[antlr-interest] Fortran grammars
Andy Tripp
atripp at comcast.net
Mon Jun 13 15:44:48 PDT 2005
I had started a F77 ANTLR grammar, but never finished.
This might make a decent starting point. I was going from the
HP F77 manual that came with their RTE-A operating system.
Andy
--------------------------------------------------------------
/** Fortran Recognizer
*
* Run 'java Main [-showtree] directory-full-of-java-files'
*
* [The -showtree option pops up a Swing frame that shows
* the AST constructed from the parser.]
*
* Run 'java Main <directory full of java files>'
*
* Contributing authors:
* Andy Tripp atripp at comcast.net
*/
class FortranRecognizer extends Parser;
options {
k = 2; // two token lookahead
exportVocab=Fortran; // Call its vocabulary "Fortran"
codeGenMakeSwitchThreshold = 2; // Some optimizations
codeGenBitsetTestThreshold = 3;
defaultErrorHandler = false; // Don't generate parser error handlers
buildAST = true;
}
tokens { DOT; MIL_STD_OCTAL; MIL_STD_HEX; NUM_REAL; NUM_BINARY;
ARRAY_ELEMENT_NAME; SUBSTRING_NAME;
}
executableProgram
: topStatement
NEWLINE
statementList
"END"
;
topStatement
: programStatement
| functionStatement
| subroutineStatement
| blockDataStatement
;
statementList
: statement
(NEWLINE CONTINUATION statement )*
;
/* 6 and 7 */
statement
: includeStatement
| formatStatement
| entryStatement
| parameterStatement
| implicitStatement
| dataStatement
| statementFunctionStatement
| dimensionStatement
| equivalenceStatement
| commonStatement
| typeStatement
| externalStatement
| intrinsicStatement
| saveStatement
| emaStatement
| assignmentStatement
| gotoStatement
| ifStatement
| elseStatement
| endIfStatement
| doStatement
| endDoStatement
| continueStatement
| stopStatement
| pauseStatement
| writeStatement
| printStatement
| decodeStatement
| encodeStatement
| rewindStatement
| backspaceStatement
| endFileStatement
| openStatement
| closeStatement
| inquireStatement
| callStatement
| returnStatement
;
/* not in syntax chart */
includeStatement
: "INCLUDE" STRING_CONSTANT
;
/* 8 */
programStatement
: "PROGRAM" SNAME
( LPAREN
( INTEGER_CONSTANT (COMMA! INTEGER_CONSTANT)* )?
RPAREN )?
( COMMA PROCESSOR_STRING )*
;
/* 9, 11, and 14 */
entryStatement
: "ENTRY" SNAME
( options { generateAmbigWarnings=false; }:
(functionArgumentList)*
| subroutineArgumentList
)
;
/* 10 */
functionStatement
: (
"INTEGER"
| "REAL"
| "DOUBLE PRECISION"
| "COMPLEX"
| "DOUBLE COMPLEX"
| "LOGICAL"
| "CHARACTER"
)
( STAR lenSpecification )?
"FUNCTION"
( COMMA PROCESSOR_STRING )?
;
/* 12 */
functionArgumentList
: LPAREN
( SNAME ( COMMA SNAME)* )?
RPAREN
;
/* 13 */
subroutineStatement
: "SUBROUTINE" SNAME
(subroutineArgumentList)?
( COMMA PROCESSOR_STRING )?
;
/* 15 */
subroutineArgumentList
: LPAREN
( (SNAME | STAR) ( COMMA (SNAME | STAR))* )?
RPAREN
;
/* 16 */
blockDataStatement
: "BLOCK DATA" ( SNAME ) ?
;
/* 17 */
dimensionStatement
: "DIMENSION" arrayDeclarator ( COMMA arrayDeclarator )*
;
/* 18 */
arrayDeclarator
: SNAME
LPAREN
arrayDeclarator2 (COMMA arrayDeclarator2)*
(STAR)?
RPAREN
;
arrayDeclarator2
: (dimBoundExpression (COLON dimBoundExpression)? )?
;
/* 19,20 */
equivalenceStatement
: "EQUIVALENCE"
equivalenceStatement2 (COMMA equivalenceStatement2)*
;
equivalenceStatement2
: LPAREN
name (COMMA name)*
RPAREN
;
/* 21 */
commonStatement
: "COMMON"
common2 (COMMA common2)*
;
common2
options { generateAmbigWarnings=false; }
: (DIV SNAME DIV)?
( arrayDeclarator | SNAME)
( options { generateAmbigWarnings=false; }:
COMMA (arrayDeclarator | SNAME) )*
;
/* 22 */
typeStatement
: ( "INTEGER"
| "REAL"
| "DOUBLE PRECISION"
| "COMPLEX"
| "DOUBLE COMPLEX"
| "LOGICAL"
)
(STAR lenSpecification (COMMA)? )?
type2 (COMMA type2)*
| "CHARACTER" (STAR lenSpecification (COMMA)? )?
type2 (STAR lenSpecification)?
(COMMA type2 (STAR lenSpecification))?
;
type2
: SNAME
| arrayDeclarator
;
/* 23 */
implicitStatement
: "IMPLICIT" ( "NONE" | implicit2 (COMMA implicit2)* )
;
implicit2
: ( "INTEGER"
| "REAL"
| "DOUBLE PRECISION"
| "COMPLEX"
| "DOUBLE COMPLEX"
| "LOGICAL"
| "CHARACTER"
)
(STAR lenSpecification)?
LPAREN
// DASH_STRING (COMMA DASH_STRING)*
SNAME (COMMA SNAME)*
PAREN
;
/* 24 */
lenSpecification
: ( LPAREN STAR RPAREN)
| INTEGER_CONSTANT
| ( LPAREN INTEGER_CONSTANT RPAREN)
;
/* 25 */
parameterStatement
: "PARAMETER"
LPAREN
( SNAME EQUALS constantExpression )
( COMMA SNAME EQUALS constantExpression )*
RPAREN
;
/* 26 */
externalStatement
: "EXTERNAL"
SNAME // procedure_name or block_data_subprogram_name
(COMMA SNAME)*
;
/* 27 */
intrinsicStatement
: "INTRINSIC"
SNAME // functionName
(COMMA SNAME)*
;
/* 28 */
saveStatement
: "SAVE"
(SNAME | DIV SNAME DIV)
( COMMA (SNAME | DIV SNAME DIV) )*
;
/* 29 */
emaStatement
: "EMA" SNAME (COMMA SNAME)*
;
/* 30 */
dataStatement
: "DATA" data1 (COMMA data1)*
;
data1
: data2 (COMMA data2)*
DIV
data3 (COMMA data3)*
DIV
;
data2
: name
| dataImpliedDoList
;
data3
: ( ( INTEGER_CONSTANT | SNAME) STAR )?
( constant | SNAME )
;
/* 31 */
dataImpliedDoList
: LPAREN
dataImplied2 (COMMA dataImplied2)*
SNAME EQUALS
arithmeticExpression (COMMA arithmeticExpression)*
RPAREN
;
dataImplied2
: name (COMMA dataImpliedDoList)*
;
/* 34 */
assignmentStatement
: name EQUALS expression
| "ASSIGN" label "TO" SNAME
;
/* 35 */
gotoStatement
: "GO TO"
(unconditionalGoto | computedGoto | assignedGoto)
;
/* 36 */
unconditionalGoto
: INTEGER_CONSTANT
;
/* 37 */
computedGoto
: LPAREN
label (COMMA label)*
RPAREN
(COMMA)?
arithmeticExpression
;
/* 38 */
assignedGoto
: SNAME
((COMMA)? LPAREN label (COMMA label)* RPAREN)?
;
/* 39, 40, 42 */
ifStatement
: "IF" LPAREN arithmeticExpression RPAREN
(
(label COMMA label COMMA label)
// TMP | (statement | "THEN")
)
;
/* 42, 43 */
elseStatement
: "ELSE" (ifStatement)?
;
/* 44 */
endIfStatement
: "END IF"
;
/* 45, 46, 47 */
doStatement
: "DO" (label (COMMA)?)?
(
(SNAME EQUALS arithmeticExpression (COMMA
arithmeticExpression)*)
| ("WHILE" LPAREN arithmeticExpression RPAREN)
)
;
/* 48 */
endDoStatement
: "END DO"
;
/* 49 */
continueStatement
: "CONTINUE"
;
/* 50 */
stopStatement
: "STOP" (INTEGER_CONSTANT | characterExpression)?
;
/* 51 */
pauseStatement
: "PAUSE" (INTEGER_CONSTANT | characterExpression)?
;
/* 52 */
writeStatement
: "WRITE" LPAREN controlInfoList RPAREN (ioList)?
;
/* 53 */
readStatement
: "READ"
(
options { generateAmbigWarnings=false; }:
LPAREN controlInfoList RPAREN (ioList)?
| (formatIdentifier (COMMA ioList)?)
)
;
/* 54 */
printStatement
: "PRINT" formatIdentifier (COMMA ioList)?
;
/* 55 */
decodeStatement
: "DECODE" encodeDecode1
;
/* 56 */
encodeStatement
: "ENCODE" encodeDecode1
;
encodeDecode1
: LPAREN
arithmeticExpression COMMA formatIdentifier COMMA SNAME
(encodeDecode2)*
RPAREN
(ioList)?
;
encodeDecode2
: COMMA
(
("ERR" EQUALS LABEL)
| ("IOSTAT" EQUALS name)
)
;
/* 57 */
controlInfoList
: unitIdentifier (COMMA controlInfoList2)*
;
controlInfoList2
: "FMT" EQUALS formatIdentifier
| "UNIT" EQUALS unitIdentifier
| "REC" EQUALS arithmeticExpression
| "ZBUF" EQUALS name
| "ZLEN" EQUALS arithmeticExpression
| "END" EQUALS label
| "ERR" EQUALS label
| "IOSTAT" EQUALS name
| formatIdentifier
;
/* 58 */
unitIdentifier
: arithmeticExpression
(
// TMP: dont support for now: (APOSTROPHE arithmeticExpression)
(COLON arithmeticExpression) (COLON arithmeticExpression)?
)
| STAR
;
/* 59 */
ioList
// : ioList2 (COMMA ioList2)*
: expression (COMMA expression)*
;
/* don't support for now:
ioList2
: expression
| ioImpliedDoList
;
*/
/* 60 */
/* don't support for now:
ioImpliedDoList
: LPAREN ioList COMMA SNAME EQUALS arithmeticExpression
(COMMA arithmeticExpression)+
RPAREN
;
*/
/* 61 */
openStatement
: "OPEN" LPAREN open2 (COMMA open2)* RPAREN
;
open2
: "UNIT" EQUALS arithmeticExpression
| "ERR" EQUALS characterExpression
| "FILE" EQUALS characterExpression
| "STATUS" EQUALS characterExpression
| "ACCESS" EQUALS characterExpression
| "FORM" EQUALS characterExpression
| "RECL" EQUALS arithmeticExpression
| "BLANK" EQUALS characterExpression
| "MAXREC" EQUALS arithmeticExpression
| "USE" EQUALS characterExpression
| "NODE" EQUALS arithmeticExpression
| "BUFSIZ" EQUALS arithmeticExpression
| "IOSTAT" EQUALS name
| arithmeticExpression
;
/* 62 */
closeStatement
: "CLOSE" LPAREN close2 (COMMA close2)* RPAREN
;
close2
: "UNIT" EQUALS arithmeticExpression
| "ERR" EQUALS label
| "STATUS" EQUALS characterExpression
| "IOSTAT" EQUALS name
| arithmeticExpression
;
/* 63 */
inquireStatement
: "INQUIRE" LPAREN inquire2 (COMMA inquire2)* RPAREN
;
inquire2
: "UNIT" EQUALS arithmeticExpression
| "FILE" EQUALS characterExpression
| "ERR" EQUALS label
| (
"IOSTAT" | "EXIST" | "OPENED" | "NUMBER" | "NAMED" | "NAME" |
"ACCESS" | "SEQUENTIAL" | "DIRECT" | "FORM" | "FORMATTED" |
"UNFORMATTED" | "RECL" | "NEXTREC" | "BLANK" | "MAXREC" |
"USE" | "NODE"
) name
;
/* 64 */
backspaceStatement
: "BACKSPACE"
;
/* 65 */
endFileStatement
: "ENDFILE"
;
/* 66 */
rewindStatement
: "REWIND"
(
// when we see LPAREN then arithmeticExpression, it's ok
// to just match rewind2
options { generateAmbigWarnings=false; }:
( LPAREN rewind2 (COMMA rewind2)* RPAREN )
| arithmeticExpression
)
;
rewind2
: "UNIT" EQUALS arithmeticExpression
| "ERR" EQUALS label
| "IOSTAT" EQUALS name
| arithmeticExpression
;
/* 67 */
formatIdentifier
: label
| characterExpression
| STAR
;
/* 68, 69, 70, 71-79 */
formatStatement
: FORMAT_STATEMENT
;
/* 80 */
statementFunctionStatement
: LPAREN (SNAME (COMMA SNAME)* )? RPAREN EQUALS expression
;
/* 81 */
callStatement
: "CALL" SNAME ( LPAREN (call2 (COMMA call2)*)? )? RPAREN
;
call2
: expression
| PROCESSOR_STRING // longHollerithConst
| STAR label
;
/* 82 */
returnStatement
: "RETURN" arithmeticExpression
;
/* 83 */
functionReference
: SNAME LPAREN (functionRef2 (COMMA functionRef2)*)? RPAREN
;
functionRef2
: expression
| PROCESSOR_STRING // longHollerithConst
;
/* 84 */
expression
: characterExpression (relOp characterExpression)*
;
/* 85 */
constantExpression
: expression
;
/* 86, 87, 88 */
// integerExpression, logicalExpression, combined into arithmeticExpression
// combined into just expression
characterExpression
: arithmeticExpression ( (SLASH_SLASH) arithmeticExpression )*
;
arithmeticExpression
: additiveExpression ( (AND|OR|EQV|NEQV|XOR|EOR) additiveExpression )*
;
additiveExpression
: multiplicativeExpression ( (PLUS | MINUS) multiplicativeExpression )*
;
multiplicativeExpression
: exponentiationExpression ( (STAR | DIV) exponentiationExpression )*
;
exponentiationExpression
: unaryExpression ( (EXPONENT) unaryExpression )*
;
unaryExpression
options { generateAmbigWarnings=false; }// functionReference before name
: PLUS unaryExpression
| MINUS unaryExpression
| NOT unaryExpression
| functionReference
| name
| LPAREN arithmeticExpression RPAREN
| logicalConstant
| CHARACTER_CONSTANT
;
/* 91 */
dimBoundExpression
: unaryExpression ((PLUS|MINUS|DIV|EXPONENT) unaryExpression )*
;
/* 92 */
// characterExpression moved above arithmeticExpression
/* No need for 93: characterConstExpression */
/* 96 */
// relationalExpression combined with expression
/* 97 */
relOp
: LT
| LE
| EQ
| NE
| GT
| GE
;
/* 98, 99 */
name
: sname:SNAME
(LPAREN (arithmeticExpression)?
(
(COMMA arithmeticExpression)+
{#sname.setType(ARRAY_ELEMENT_NAME);}
| COLON (arithmeticExpression)?
{#sname.setType(SUBSTRING_NAME);}
)
RPAREN {#sname.setType(SUBSTRING_NAME);}
)
;
/* 110 */
constant
: logicalConstant
| CHARACTER_CONSTANT
| INTEGER_CONSTANT
| NUM_REAL
| MIL_STD_OCTAL
| MIL_STD_HEX
| NUM_BINARY
;
/* 121 */
logicalConstant
: ".TRUE."
| ".FALSE."
;
/* 123 */
label
: INTEGER_CONSTANT
;
//----------------------------------------------------------------------------
// The Fortran scanner
//----------------------------------------------------------------------------
class FortranLexer extends Lexer;
options {
exportVocab=Fortran; // call the vocabulary "Fortran"
testLiterals=false; // don't automatically test for literals
k=4; // four characters of lookahead
charVocabulary='\u0003'..'\u7FFE';
}
EQUALS : '=' ;
LPAREN : '(' ;
RPAREN : ')' ;
COLON : ':' ;
COMMA : ',' ;
DIV : '/' ;
STAR : {getColumn() != 1}? '*' ;
PLUS : '+' ;
MINUS : '-' ;
LNOT : ".NOT." ;
LAND : ".AND." ;
LOR : ".OR." ;
EQV : ".EQV." ;
NEQV : ".NEQV." ;
XOR : ".XOR." ;
EOR : ".EOR." ;
EXP : {getColumn() != 1}? "**" ;
SLASH_SLASH : "//" ;
LT : ".LT." ;
LE : ".LE." ;
GT : ".GT." ;
GE : ".GE." ;
NE : ".NE." ;
EQ : ".EQ." ;
CONTINUATION : "&" ;
// Whitespace -- ignored
WS : ( ' '
| '\t'
| '\f'
// handle newlines
| ( options {generateAmbigWarnings=false;}
: "\r\n" // Evil DOS
| '\r' // Macintosh
| '\n' // Unix (the right way)
)
{ newline(); }
)+
{ _ttype = Token.SKIP; }
;
// Single-line comments
END_COMMENT
: "!"
(~('\n'|'\r'))* ('\n'|'\r'('\n')?)?
{$setType(Token.SKIP); newline();}
;
SL_COMMENT
: {getColumn() == 1}? ('C' | '*')
(~('\n'|'\r'))* ('\n'|'\r'('\n')?)?
{$setType(Token.SKIP); newline();}
;
/* 122 */
CHARACTER_CONSTANT
: '\'' ( ~('\'') )* '\''
;
// string literals
STRING_LITERAL
: '"' (~('"'|'\\'|'\n'|'\r'))* '"'
;
LOGICAL_CONSTANT
: ".TRUE." | ".FALSE."
;
// hexadecimal digit (again, note it's protected!)
protected
OCTAL_DIGIT
: ('0'..'7')
;
// hexadecimal digit (again, note it's protected!)
protected
HEX_DIGIT
: ('0'..'9'|'A'..'F'|'a'..'f')
;
// an identifier. Note that testLiterals is set to true! This means
// that after we match the rule, we look in the literals table to see
// if it's a literal or really an identifer
SNAME
options {testLiterals=true;}
: {getColumn() != 1 && LA(2) != '\''}?
('a'..'z'|'A'..'Z'|'_'|'$') ('a'..'z'|'A'..'Z'|'_'|'0'..'9'|'$'|'-')*
;
// a numeric literal
INTEGER_CONSTANT
: '.' {_ttype = DOT;}
( ('0'..'9')+ (EXPONENT)?
{
_ttype = NUM_REAL;
}
)?
/* 32 */
| ( 'O' '\'' (OCTAL_DIGIT)+ '\'' ) { _ttype = MIL_STD_OCTAL; }
/* 33 */
| ( 'Z' '\'' (HEX_DIGIT)+ '\'' ) { _ttype = MIL_STD_HEX; }
| ('0'..'9')+
(
'.' ('0'..'9')* (EXPONENT)? { _ttype = NUM_REAL; }
| ('I'|'J')? { _ttype = INTEGER_CONSTANT; }
| ('B') { _ttype = NUM_BINARY; }
)
;
// a couple protected methods to assist in matching floating point numbers
protected
EXPONENT
: ('e'|'E') ('+'|'-')? ('0'..'9')+
| ('d'|'D') ('+'|'-')? ('0'..'9')+
;
// for now, accept anything within parens:
FORMAT_STATEMENT
: "FORMAT" '(' (~')')* '('
;
protected
NEWLINE
: ('\r' | '\n' | '\r' '\n')
;
More information about the antlr-interest
mailing list