[antlr-interest] Fortran grammars

Andy Tripp atripp at comcast.net
Mon Jun 13 15:44:48 PDT 2005


I had started a F77 ANTLR grammar, but never finished.
This might make a decent starting point. I was going from the
HP F77 manual that came with their RTE-A operating system.
Andy

--------------------------------------------------------------

/** Fortran Recognizer
 *
 * Run 'java Main [-showtree] directory-full-of-java-files'
 *
 * [The -showtree option pops up a Swing frame that shows
 *  the AST constructed from the parser.]
 *
 * Run 'java Main <directory full of java files>'
 *
 * Contributing authors:
 *        Andy Tripp          atripp at comcast.net
 */

class FortranRecognizer extends Parser;
options {
    k = 2;                           // two token lookahead
    exportVocab=Fortran;                // Call its vocabulary "Fortran"
    codeGenMakeSwitchThreshold = 2;  // Some optimizations
    codeGenBitsetTestThreshold = 3;
    defaultErrorHandler = false;     // Don't generate parser error handlers
    buildAST = true;
}

tokens { DOT; MIL_STD_OCTAL; MIL_STD_HEX; NUM_REAL; NUM_BINARY;
ARRAY_ELEMENT_NAME; SUBSTRING_NAME;
}

executableProgram
    : topStatement
      NEWLINE
      statementList
      "END"
    ;

topStatement
    : programStatement
    | functionStatement
    | subroutineStatement
    | blockDataStatement
    ;

statementList
    :    statement
        (NEWLINE CONTINUATION statement )*
    ;

/* 6 and 7 */
statement
    : includeStatement
    | formatStatement
    | entryStatement
    | parameterStatement
    | implicitStatement
    | dataStatement
    | statementFunctionStatement

    | dimensionStatement
    | equivalenceStatement
    | commonStatement
    | typeStatement
    | externalStatement
    | intrinsicStatement
    | saveStatement
    | emaStatement

    | assignmentStatement
    | gotoStatement
    | ifStatement
    | elseStatement
    | endIfStatement
    | doStatement
    | endDoStatement
    | continueStatement
    | stopStatement
    | pauseStatement
    | writeStatement
    | printStatement
    | decodeStatement
    | encodeStatement
    | rewindStatement
    | backspaceStatement
    | endFileStatement
    | openStatement
    | closeStatement
    | inquireStatement
    | callStatement
    | returnStatement
    ;


/* not in syntax chart */
includeStatement
    : "INCLUDE" STRING_CONSTANT
    ;

/* 8 */
programStatement
    : "PROGRAM" SNAME
        ( LPAREN
          ( INTEGER_CONSTANT (COMMA! INTEGER_CONSTANT)* )?
          RPAREN )?
        ( COMMA PROCESSOR_STRING )*
    ;

/* 9, 11, and 14 */
entryStatement
    : "ENTRY" SNAME
      ( options { generateAmbigWarnings=false; }:
       (functionArgumentList)*
      | subroutineArgumentList
      )
    ;

/* 10 */
functionStatement
    :    (
          "INTEGER"
        | "REAL"
        | "DOUBLE PRECISION"
        | "COMPLEX"
        | "DOUBLE COMPLEX"
        | "LOGICAL"
        | "CHARACTER"
        )
        ( STAR lenSpecification )?
        "FUNCTION"
        ( COMMA PROCESSOR_STRING )?
    ;


/* 12 */
functionArgumentList
    : LPAREN
      ( SNAME ( COMMA SNAME)* )?
      RPAREN
    ;

/* 13 */
subroutineStatement
    :    "SUBROUTINE" SNAME
        (subroutineArgumentList)?
        ( COMMA PROCESSOR_STRING )?
    ;

/* 15 */
subroutineArgumentList
    : LPAREN
      ( (SNAME | STAR) ( COMMA (SNAME | STAR))* )?
      RPAREN
    ;

/* 16 */
blockDataStatement
    :    "BLOCK DATA" ( SNAME ) ?
    ;

/* 17 */
dimensionStatement
    :    "DIMENSION" arrayDeclarator ( COMMA arrayDeclarator )*
    ;

/* 18 */
arrayDeclarator
    : SNAME
      LPAREN
      arrayDeclarator2 (COMMA arrayDeclarator2)*
      (STAR)?
      RPAREN
    ;

arrayDeclarator2
    : (dimBoundExpression (COLON dimBoundExpression)? )?
    ;

/* 19,20 */
equivalenceStatement
    : "EQUIVALENCE"
      equivalenceStatement2 (COMMA equivalenceStatement2)*
    ;

equivalenceStatement2
    : LPAREN
      name (COMMA name)*
      RPAREN
    ;

/* 21 */
commonStatement
    : "COMMON"
      common2 (COMMA common2)*
    ;

common2
options { generateAmbigWarnings=false; }
    : (DIV SNAME DIV)?
      ( arrayDeclarator | SNAME)
      ( options { generateAmbigWarnings=false; }:
        COMMA (arrayDeclarator | SNAME) )*
    ;


/* 22 */
typeStatement
    :    ( "INTEGER"
        | "REAL"
        | "DOUBLE PRECISION"
        | "COMPLEX"
        | "DOUBLE COMPLEX"
        | "LOGICAL"
        )
        (STAR lenSpecification (COMMA)? )?
        type2 (COMMA type2)*

    | "CHARACTER" (STAR lenSpecification (COMMA)? )?
      type2 (STAR lenSpecification)?
      (COMMA type2 (STAR lenSpecification))?
    ;

type2
    : SNAME
    | arrayDeclarator
    ;

/* 23 */
implicitStatement
    : "IMPLICIT" ( "NONE" | implicit2 (COMMA implicit2)* )
    ;

implicit2
    : ( "INTEGER"
            | "REAL"
            | "DOUBLE PRECISION"
            | "COMPLEX"
            | "DOUBLE COMPLEX"
            | "LOGICAL"
            | "CHARACTER"
        )
        (STAR lenSpecification)?
        LPAREN
        // DASH_STRING (COMMA DASH_STRING)*
        SNAME (COMMA SNAME)*
        PAREN
    ;


/* 24 */
lenSpecification
    : ( LPAREN STAR RPAREN)
    | INTEGER_CONSTANT
    | ( LPAREN INTEGER_CONSTANT RPAREN)
    ;

/* 25 */
parameterStatement
    : "PARAMETER"
      LPAREN
      ( SNAME EQUALS constantExpression )
      ( COMMA SNAME EQUALS constantExpression )*
      RPAREN
    ;

/* 26 */
externalStatement
    : "EXTERNAL"
      SNAME         // procedure_name or block_data_subprogram_name
      (COMMA SNAME)*
    ;

/* 27 */
intrinsicStatement
    : "INTRINSIC"
      SNAME            // functionName
      (COMMA SNAME)*
    ;

/* 28 */
saveStatement
    : "SAVE"
      (SNAME | DIV SNAME DIV)
      ( COMMA (SNAME | DIV SNAME DIV) )*
    ;


/* 29 */
emaStatement
    : "EMA" SNAME (COMMA SNAME)*
    ;

/* 30 */
dataStatement
    : "DATA" data1 (COMMA data1)*
    ;

data1
    : data2 (COMMA data2)*
      DIV
      data3 (COMMA data3)*
      DIV
    ;

data2
    : name
    | dataImpliedDoList
    ;

data3
    : ( ( INTEGER_CONSTANT | SNAME) STAR )?
      ( constant | SNAME )
    ;

/* 31 */
dataImpliedDoList
    : LPAREN
      dataImplied2 (COMMA dataImplied2)*
      SNAME EQUALS
      arithmeticExpression (COMMA arithmeticExpression)*
      RPAREN
    ;

dataImplied2
    : name (COMMA dataImpliedDoList)*
    ;

/* 34 */
assignmentStatement
    : name EQUALS expression
    | "ASSIGN" label "TO" SNAME
    ;

/* 35 */
gotoStatement
    : "GO TO"
      (unconditionalGoto | computedGoto | assignedGoto)
    ;

/* 36 */
unconditionalGoto
    : INTEGER_CONSTANT
    ;

/* 37 */
computedGoto
    : LPAREN
      label (COMMA label)*
      RPAREN
      (COMMA)?
      arithmeticExpression
    ;

/* 38 */
assignedGoto
    : SNAME
      ((COMMA)? LPAREN label (COMMA label)* RPAREN)?
    ;

/* 39, 40, 42 */
ifStatement
    : "IF" LPAREN arithmeticExpression RPAREN
      (
          (label COMMA label COMMA label)
// TMP        | (statement | "THEN")
      )
    ;

/* 42, 43 */
elseStatement
    : "ELSE" (ifStatement)?
    ;

/* 44 */
endIfStatement
    : "END IF"
    ;

/* 45, 46, 47 */
doStatement
    : "DO" (label (COMMA)?)?
        (
            (SNAME EQUALS arithmeticExpression (COMMA 
arithmeticExpression)*)
            | ("WHILE" LPAREN arithmeticExpression RPAREN)
        )
    ;


/* 48 */
endDoStatement
    : "END DO"
    ;

/* 49 */
continueStatement
    : "CONTINUE"
    ;

/* 50 */
stopStatement
    : "STOP" (INTEGER_CONSTANT | characterExpression)?
    ;

/* 51 */
pauseStatement
    : "PAUSE" (INTEGER_CONSTANT | characterExpression)?
    ;

/* 52 */
writeStatement
    : "WRITE" LPAREN controlInfoList RPAREN (ioList)?
    ;

/* 53 */
readStatement
    : "READ"
        (
          options { generateAmbigWarnings=false; }:
          LPAREN controlInfoList RPAREN (ioList)?
        | (formatIdentifier (COMMA ioList)?)
        )
    ;

/* 54 */
printStatement
    : "PRINT" formatIdentifier (COMMA ioList)?
    ;

/* 55 */
decodeStatement
    : "DECODE" encodeDecode1
    ;
   
/* 56 */
encodeStatement
    : "ENCODE" encodeDecode1
    ;
   
encodeDecode1
    : LPAREN
      arithmeticExpression COMMA formatIdentifier COMMA SNAME      
      (encodeDecode2)*
      RPAREN
      (ioList)?
    ;
     
encodeDecode2
    : COMMA
        (
          ("ERR" EQUALS LABEL)
        | ("IOSTAT" EQUALS name)
        )
    ;


/* 57 */
controlInfoList
    : unitIdentifier (COMMA controlInfoList2)*
    ;

controlInfoList2
    : "FMT" EQUALS formatIdentifier
    | "UNIT" EQUALS unitIdentifier
    | "REC" EQUALS arithmeticExpression
    | "ZBUF" EQUALS name
    | "ZLEN" EQUALS arithmeticExpression
    | "END" EQUALS label
    | "ERR" EQUALS label
    | "IOSTAT" EQUALS name
    | formatIdentifier
    ;

/* 58 */
unitIdentifier
    : arithmeticExpression
        (
            // TMP: dont support for now: (APOSTROPHE arithmeticExpression)
          (COLON arithmeticExpression) (COLON arithmeticExpression)?
        )
    | STAR
    ;

/* 59 */
ioList
//    : ioList2 (COMMA ioList2)*
    : expression (COMMA expression)*
    ;

/* don't support for now:
ioList2
    : expression
    | ioImpliedDoList
    ;
*/

/* 60 */
/* don't support for now:
ioImpliedDoList
    : LPAREN ioList COMMA SNAME EQUALS arithmeticExpression
      (COMMA arithmeticExpression)+
      RPAREN
    ;
*/

/* 61 */
openStatement
    : "OPEN" LPAREN open2 (COMMA open2)* RPAREN
    ;

open2
    : "UNIT" EQUALS arithmeticExpression
    | "ERR" EQUALS characterExpression
    | "FILE" EQUALS characterExpression
    | "STATUS" EQUALS characterExpression
    | "ACCESS" EQUALS characterExpression
    | "FORM" EQUALS characterExpression
    | "RECL" EQUALS arithmeticExpression
    | "BLANK" EQUALS characterExpression
    | "MAXREC" EQUALS arithmeticExpression
    | "USE" EQUALS characterExpression
    | "NODE" EQUALS arithmeticExpression
    | "BUFSIZ" EQUALS arithmeticExpression
    | "IOSTAT" EQUALS name
    | arithmeticExpression
    ;

/* 62 */
closeStatement
    : "CLOSE" LPAREN close2 (COMMA close2)* RPAREN
    ;

close2
    : "UNIT" EQUALS arithmeticExpression
    | "ERR" EQUALS label
    | "STATUS" EQUALS characterExpression
    | "IOSTAT" EQUALS name
    | arithmeticExpression
    ;


/* 63 */
inquireStatement
    : "INQUIRE" LPAREN inquire2 (COMMA inquire2)* RPAREN
    ;

inquire2
    : "UNIT" EQUALS arithmeticExpression
    | "FILE" EQUALS characterExpression
    | "ERR" EQUALS label
    | (
        "IOSTAT" | "EXIST" | "OPENED" | "NUMBER" | "NAMED" | "NAME" |
        "ACCESS" | "SEQUENTIAL" | "DIRECT" | "FORM" | "FORMATTED" |
        "UNFORMATTED" | "RECL" | "NEXTREC" | "BLANK" | "MAXREC" |
        "USE" | "NODE"
      )  name
    ;

/* 64 */
backspaceStatement
    : "BACKSPACE"
    ;

/* 65 */
endFileStatement
    : "ENDFILE"
    ;

/* 66 */
rewindStatement
    : "REWIND"
        (
          // when we see LPAREN then arithmeticExpression, it's ok
          // to just match rewind2
          options { generateAmbigWarnings=false; }:
          ( LPAREN rewind2 (COMMA rewind2)* RPAREN )
          | arithmeticExpression
        )
    ;

rewind2
    : "UNIT" EQUALS arithmeticExpression
    | "ERR" EQUALS label
    | "IOSTAT" EQUALS name
    | arithmeticExpression
    ;


/* 67 */
formatIdentifier
    : label
    | characterExpression
    | STAR
    ;

/* 68, 69, 70, 71-79 */
formatStatement
    : FORMAT_STATEMENT
    ;

/* 80 */
statementFunctionStatement
    : LPAREN (SNAME (COMMA SNAME)* )? RPAREN EQUALS expression
    ;

/* 81 */
callStatement
    : "CALL" SNAME ( LPAREN (call2 (COMMA call2)*)? )? RPAREN
    ;

call2
    : expression
    | PROCESSOR_STRING  // longHollerithConst
    | STAR label
    ;

/* 82 */
returnStatement
    : "RETURN" arithmeticExpression
    ;

/* 83 */
functionReference
    : SNAME LPAREN (functionRef2 (COMMA functionRef2)*)? RPAREN
    ;

functionRef2
    : expression
    | PROCESSOR_STRING  // longHollerithConst
    ;

/* 84 */
expression
    : characterExpression (relOp characterExpression)*
    ;

/* 85 */
constantExpression
    : expression
    ;

/* 86, 87, 88 */
// integerExpression, logicalExpression, combined into arithmeticExpression
// combined into just expression

characterExpression
    : arithmeticExpression ( (SLASH_SLASH) arithmeticExpression )*
    ;

arithmeticExpression
    : additiveExpression ( (AND|OR|EQV|NEQV|XOR|EOR) additiveExpression )*
    ;

additiveExpression
    : multiplicativeExpression ( (PLUS | MINUS) multiplicativeExpression )*
    ;

multiplicativeExpression
    : exponentiationExpression ( (STAR | DIV) exponentiationExpression )*
    ;

exponentiationExpression
    : unaryExpression ( (EXPONENT) unaryExpression )*
    ;


unaryExpression
    options { generateAmbigWarnings=false; }// functionReference before name
    : PLUS unaryExpression
    | MINUS unaryExpression
    | NOT unaryExpression
    | functionReference
    | name
    | LPAREN arithmeticExpression RPAREN
    | logicalConstant
    | CHARACTER_CONSTANT
    ;

/* 91 */
dimBoundExpression
    : unaryExpression ((PLUS|MINUS|DIV|EXPONENT) unaryExpression )*
    ;

/* 92 */
// characterExpression moved above arithmeticExpression

/* No need for 93: characterConstExpression */

/* 96 */
// relationalExpression combined with expression

/* 97 */
relOp
    : LT
    | LE
    | EQ
    | NE
    | GT
    | GE
    ;


/* 98, 99 */
name
    : sname:SNAME
      (LPAREN (arithmeticExpression)?
              (
              (COMMA arithmeticExpression)+
                {#sname.setType(ARRAY_ELEMENT_NAME);}

              | COLON (arithmeticExpression)?
                {#sname.setType(SUBSTRING_NAME);}
            )
      RPAREN {#sname.setType(SUBSTRING_NAME);}
      )
    ;

/* 110 */
constant
    : logicalConstant
    | CHARACTER_CONSTANT
    | INTEGER_CONSTANT
    | NUM_REAL
    | MIL_STD_OCTAL
    | MIL_STD_HEX
    | NUM_BINARY
    ;

/* 121 */
logicalConstant
    : ".TRUE."
    | ".FALSE."
    ;


/* 123 */
label
    : INTEGER_CONSTANT
    ;

     
    
//----------------------------------------------------------------------------
    // The Fortran scanner
    
//----------------------------------------------------------------------------
class FortranLexer extends Lexer;

options {
    exportVocab=Fortran;      // call the vocabulary "Fortran"
    testLiterals=false;    // don't automatically test for literals
    k=4;                   // four characters of lookahead
    charVocabulary='\u0003'..'\u7FFE';
}



EQUALS            :    '='        ;
LPAREN            :    '('        ;
RPAREN            :    ')'        ;
COLON            :    ':'        ;
COMMA            :    ','        ;
DIV                :    '/'        ;
STAR            :    {getColumn() != 1}? '*'        ;
PLUS            :    '+'        ;
MINUS            :    '-'        ;
LNOT            :    ".NOT."        ;
LAND            :    ".AND."        ;
LOR                :    ".OR."    ;
EQV                :    ".EQV."    ;
NEQV            :    ".NEQV."    ;
XOR                :    ".XOR."    ;
EOR                :    ".EOR."    ;
EXP                :    {getColumn() != 1}? "**"        ;
SLASH_SLASH        :    "//"        ;
LT                :    ".LT."        ;
LE                :    ".LE."        ;
GT                :    ".GT."        ;
GE                :    ".GE."        ;
NE                :    ".NE."        ;
EQ                :    ".EQ."        ;
CONTINUATION    :    "&"        ;


// Whitespace -- ignored
WS    :    (    ' '
        |    '\t'
        |    '\f'
            // handle newlines
        |    (    options {generateAmbigWarnings=false;}
            :    "\r\n"  // Evil DOS
            |    '\r'    // Macintosh
            |    '\n'    // Unix (the right way)
            )
            { newline(); }
        )+
        { _ttype = Token.SKIP; }
    ;

// Single-line comments
END_COMMENT
    :    "!"
        (~('\n'|'\r'))* ('\n'|'\r'('\n')?)?
        {$setType(Token.SKIP); newline();}
    ;

SL_COMMENT
    :    {getColumn() == 1}? ('C' | '*')
        (~('\n'|'\r'))* ('\n'|'\r'('\n')?)?
        {$setType(Token.SKIP); newline();}
    ;

/* 122 */
CHARACTER_CONSTANT
    :    '\'' ( ~('\'') )* '\''
    ;

// string literals
STRING_LITERAL
    :    '"' (~('"'|'\\'|'\n'|'\r'))* '"'
    ;


LOGICAL_CONSTANT
    : ".TRUE." | ".FALSE."
    ;


// hexadecimal digit (again, note it's protected!)
protected
OCTAL_DIGIT
    :    ('0'..'7')
    ;

// hexadecimal digit (again, note it's protected!)
protected
HEX_DIGIT
    :    ('0'..'9'|'A'..'F'|'a'..'f')
    ;

// an identifier.  Note that testLiterals is set to true!  This means
// that after we match the rule, we look in the literals table to see
// if it's a literal or really an identifer
SNAME
    options {testLiterals=true;}
    :    {getColumn() != 1 && LA(2) != '\''}? 
('a'..'z'|'A'..'Z'|'_'|'$') ('a'..'z'|'A'..'Z'|'_'|'0'..'9'|'$'|'-')*
    ;


// a numeric literal
INTEGER_CONSTANT
    :   '.' {_ttype = DOT;}
            (    ('0'..'9')+ (EXPONENT)?
                {
                    _ttype = NUM_REAL;
                }
            )?

/* 32 */
    |    ( 'O' '\'' (OCTAL_DIGIT)+ '\'' ) { _ttype = MIL_STD_OCTAL; }
/* 33 */
    |    ( 'Z' '\'' (HEX_DIGIT)+ '\'' ) { _ttype = MIL_STD_HEX; }
    |    ('0'..'9')+
        (
            '.' ('0'..'9')* (EXPONENT)? { _ttype = NUM_REAL; }
        |    ('I'|'J')? { _ttype = INTEGER_CONSTANT; }
        |    ('B') { _ttype = NUM_BINARY; }
        )
    ;


// a couple protected methods to assist in matching floating point numbers
protected
EXPONENT
    :    ('e'|'E') ('+'|'-')? ('0'..'9')+
    |    ('d'|'D') ('+'|'-')? ('0'..'9')+
    ;

// for now, accept anything within parens:
FORMAT_STATEMENT
    : "FORMAT" '(' (~')')* '('
    ;

protected
NEWLINE
    : ('\r' | '\n' | '\r' '\n')
    ;





More information about the antlr-interest mailing list