[antlr-interest] Help with nondeterminism in cobol grammar
Jose Ventura
jose.ventura.roda at gmail.com
Wed May 10 04:37:00 PDT 2006
Skipped content of type multipart/alternative-------------- next part --------------
/*header
{
package cobol;
}
*/
class CobolLex extends Lexer;
options
{
k=1;
charVocabulary = '\3'..'\377'; // Unicodes usuales
caseSensitive=false;
caseSensitiveLiterals=false;
testLiterals=false;
exportVocab=CobolLexVocab;
}
tokens // MUST be kept in sync with "keywordsTable" Hashtable below!!
{
FIN_TODO;
NUM;
IDENT;
LIT_HEXADECIMAL;
LIT_PIC;
LIT_COPY;
FECHA;
IGUAL;
MAYOR_IGUAL;
MENOR_IGUAL;
NOT_REL;
IDENTIFICATION="IDENTIFICATION";
ID="ID";
ENVIRONMENT="ENVIRONMENT";
DATA="DATA";
WORKING="WORKING-STORAGE";
LINKAGE="LINKAGE";
DIVISION="DIVISION";
PROGRAM_ID="PROGRAM-ID";
PROCEDURE="PROCEDURE";
SECTION="SECTION";
ACCEPT="ACCEPT";
ADD="ADD";
ALL="ALL";
ALSO="ALSO";
ALTER="ALTER";
AND="AND";
AT="AT";
BLANK="BLANK";
BY="BY";
CALL="CALL";
CANCEL="CANCEL";
CICS="CICS";
CLOSE="CLOSE";
COMPUTE="COMPUTE";
COMMAREA="COMMAREA";
COMMIT="COMMIT";
CONTINUE="CONTINUE";
CORR="CORR";
CORRESPONDING="CORRESPONDING";
DELETE="DELETE";
DELIMITED="DELIMITED";
DISPLAY="DISPLAY";
DIVIDE="DIVIDE";
ELSE="ELSE";
END="END";
END_ADD="END-ADD";
END_CALL="END-CALL";
END_COMPUTE="END-COMPUTE";
END_DIVIDE="END-DIVIDE";
END_EVALUATE="END-EVALUATE";
END_EXEC="END-EXEC";
END_IF="END-IF";
END_MULTIPLY="END-MULTIPLY";
END_PERFORM="END-PERFORM";
END_SEARCH="END-SEARCH";
END_STRING="END-STRING";
END_UNSTRING="END-UNSTRING";
END_SUBTRACT="END-SUBTRACT";
END_READ="END-READ";
END_WRITE="END-WRITE";
ENTRY="ENTRY";
EVALUATE="EVALUATE";
EXEC="EXEC";
EXIT="EXIT";
METHOD="METHOD";
PROGRAM="PROGRAM";
GIVING="GIVING";
GOBACK="GOBACK";
GO_TO="GO TO";
IF="IF";
IN="IN";
INITIALIZE="INITIALIZE";
INSPECT="INSPECT";
INTO="INTO";
INVOKE="INVOKE";
LINK="LINK";
MERGE="MERGE";
MOVE="MOVE";
MULTIPLY="MULTIPLY";
OF="OF";
OPEN="OPEN";
OR="OR";
OTHER="OTHER";
PERFORM="PERFORM";
POINTER="POINTER";
READ="READ";
RECEIVE="RECEIVE";
REDEFINES="REDEFINES";
RELEASE="RELEASE";
REPLACING="REPLACING";
TALLYING="TALLYING";
RETURN="RETURN";
REWRITE="REWRITE";
ROLLBACK="ROLLBACK";
SEARCH="SEARCH";
SEND="SEND";
SET="SET";
SQL="SQL";
SORT="SORT";
SPACE="SPACE";
SPACES="SPACES";
START="START";
STOP="STOP";
STRING="STRING";
SUBTRACT="SUBTRACT";
SYNCPOINT="SYNCPOINT";
TO="TO";
THEN="THEN";
UNSTRING="UNSTRING";
WRITE="WRITE";
ZERO="ZERO";
ZEROS="ZEROS";
ZEROES="ZEROES";
THROUGH="THROUGH";
THRU="THRU";
VARYING="VARYING";
UNTIL="UNTIL";
USING="USING";
WHEN="WHEN";
WITH="WITH";
TEST="TEST";
BEFORE="BEFORE";
AFTER="AFTER";
JUSTIFIED="JUSTIFIED";
JUST="JUST";
OCCURS="OCCURS";
ASCENDING="ASCENDING";
DESCENDING="DESCENDING";
KEY="KEY";
INDEXED="INDEXED";
PIC="PIC";
PICTURE="PICTURE";
USAGE="USAGE";
VALUE="VALUE";
VALUES="VALUES";
ARE="ARE";
RENAMES="RENAMES";
COPY="COPY";
// los tipos de campos BINARY..DISPLAY1 deben estar contiguos
// ver rutina Programa.calcularLong()
BINARY="BINARY";
COMP="COMP";
COMP1="COMP-1";
COMP2="COMP-2";
COMP3="COMP-3";
COMP4="COMP-4";
COMPUTATIONAL="COMPUTATIONAL";
COMPUTATIONAL1="COMPUTATIONAL-1";
COMPUTATIONAL2="COMPUTATIONAL-2";
COMPUTATIONAL3="COMPUTATIONAL-3";
COMPUTATIONAL4="COMPUTATIONAL-4";
PACKED_DECIMAL="PACKED-DECIMAL";
DISPLAY1="DISPLAY-1";
//Fin de tipos.
INDEX="INDEX";
CHARACTERS="CHARACTERS";
LEADING="LEADING";
FIRST="FIRST";
INITIAL="INITIAL";
CONVERTING="CONVERTING";
FOR="FOR";
UP="UP";
DOWN="DOWN";
NOT="NOT";
NULL="NULL";
ON="ON";
SIZE="SIZE";
ERROR="ERROR";
THAN="THAN";
GREATER="GREATER";
LESS="LESS";
EQUAL="EQUAL";
NUMERIC="NUMERIC";
ALPHABETIC="ALPHABETIC";
ALPHANUMERIC="ALPHANUMERIC";
POSITIVE="POSITIVE";
NEGATIVE="NEGATIVE";
FROM="FROM";
COUNT="COUNT";
DELIMITIER="DELIMITER";
INCLUDE="INCLUDE";
NEXT="NEXT";
SENTENCE="SENTENCE";
ROUNDED="ROUNDED";
WRITEQ="WRITEQ";
READQ="READQ";
TS="TS";
TD="TD";
QUEUE="QUEUE";
NUMITEMS="NUMITEMS";
ITEM="ITEM";
MAIN="MAIN";
LENGTH="LENGTH";
RESP="RESP";
NOHANDLE="NOHANDLE";
ERASE="ERASE";
DEPENDING="DEPENDING";
TIMES="TIMES";
DLI="DLI";
FILE="FILE";
FD="FD";
SD="SD";
EXTERNAL="EXTERNAL";
GLOBAL="GLOBAL";
BLOCK="BLOCK";
CONTAINS="CONTAINS";
RECORDS="RECORDS";
RECORD="RECORD";
LABEL="LABEL";
STANDARD="STANDARD";
OMITTED="OMITTED";
RECORDING="RECORDING";
MODE="MODE";
INPUT="INPUT";
OUTPUT="OUTPUT";
UPON="UPON";
NO="NO";
ADVANCING="ADVANCING";
DUPLICATES="DUPLICATES";
ORDER="ORDER";
SEQUENCE="SEQUENCE";
COLLATING="COLLATING";
}
{ // Comienza la zona de código nativo
boolean ultPic=false;
boolean enExec=false;
protected Token makeToken(int type)
{
// Usamos la implementación de la superclase...
Token result = null;
int tipo=type;
// while I don't have a parser for CICS and SQL DLI
if (enExec)
{
if (type==END_EXEC||
type==RETURN ||
type==RECEIVE ||
type==SEND ||
type==SYNCPOINT ||
type==LINK ||
type==WRITEQ ||
type==READQ ||
type==INCLUDE
)
{
enExec=false;
}
else
{
tipo=IDENT;
}
}
else
{
if (type==NOT)
tipo=tipoNot();
else
enExec=(type==SQL||type==CICS||type==DLI);
}
result = super.makeToken(tipo);
ultPic =(type==PIC||type==PICTURE);
// ... añadimos información del nombre de fichero
result.setFilename(super.getFilename());
// ... y devolvemos el token
return (result);
}
protected int tipoNot ()
{
int tipo = NOT;
int marca = mark();
int saveLong = text.length();
try
{
while ( LA(1)==' ')
{
try
{
match(' ');
}
catch (MismatchedCharException m)
{
System.out.println ("Error in space following NOT. "+m.getMessage());
}
}
switch (LA(1))
{
case '=':
match ("=");
tipo = NOT_REL;
break;
case '>':
match (">");
tipo = NOT_REL;
break;
case '<':
match ("<");
tipo = NOT_REL;
break;
case 'g':
tipo = NOT_REL;
match("greater ");
break;
case 'l':
tipo = NOT_REL;
match("less ");
break;
case 'e':
tipo = NOT_REL;
match("equal ");
break;
}
}
catch (CharStreamException c)
{
System.out.println ("Problem with input buffer when it analize the NOT.");
}
catch (MismatchedCharException m)
{
tipo = NOT;
}
rewind (marca);
text.setLength(saveLong);
return (tipo);
}
}
COMENTARIO:
'@' ( ~('\n'|'\r') )*
{$setType(Token.SKIP);}
;
PAR_AB
: {!ultPic }? '('
;
PAR_CE
: {!ultPic }? ')'
;
CAD_COPY
: ("==") => "==" ( ~('=') )* "=="
{ $setType(LIT_COPY); }
| '='
{ $setType(IGUAL); }
;
DOSPUNTOS
: ':'
;
MAS
: {!ultPic }?
'+' {$setType(MAS);}
(
(DIGITO)+ (',' (DIGITO)+)?
{$setType(NUM);}
)?
;
MENOS
: {!ultPic }?
'-' {$setType(MENOS);}
(
(DIGITO)+ ({LA(2)==DIGITO}?',' (DIGITO)+)?
{$setType(NUM);}
)?
;
PUNTO
: {!ultPic }?
'.' {$setType(FIN_TODO);}
(
(DIGITO)+ {_ttype=NUM;}
)?
;
COMA
: {!ultPic }?
',' {_ttype=COMA;}
(
(DIGITO)+ {_ttype=NUM;}
)?
;
POR
: {!ultPic }? '*'
;
DIV
: '/'
;
MAYOR
: '>'
('=' {_ttype=MAYOR_IGUAL;})?
;
MENOR: '<'
('=' {_ttype=MENOR_IGUAL;})?
;
protected
LETRA
: 'a'..'z'
;
protected
DIGITO
: '0'..'9'
;
protected
NL :
(
("\r\n") => "\r\n" // MS-DOS
| '\r' // MACINTOSH
| '\n' // UNIX
)
{newline(); }
;
BLANCO :
( ' '
| '\t'
| NL
)
{ $setType(Token.SKIP); } // La acción del blanco: ignorar
;
IDENT_NUM options {testLiterals=true;}
:{ ultPic }? ('+'|'-'|'$'|'b'|','|'e'|('0'..'9')|'x'|'s'|'v'|'z'|'*'|'('|')'|
{LA(2)!=' '&&LA(2)!='\n'&&LA(2)!='\r'}? '.')+
{$setType (LIT_PIC);}
|('x' '\'') => 'x' '\''( ~('\''|'\n'|'\r'|'\t') )* '\''
{$setType (LIT_HEXADECIMAL);}
| LETRA (LETRA|DIGITO|'-'|'_')*
{String s = $getText;
if (s.equals("IS"))
$setType(Token.SKIP);
else
$setType(IDENT);
}
|((DIGITO)+ (LETRA|'-'|'_')) =>(DIGITO)+ (LETRA|'-'|'_') (LETRA|DIGITO|'-'|'_')*
{$setType (IDENT);}
|((DIGITO)+ '/' )=> (DIGITO)+ '/' (DIGITO|'/')+
{$setType (FECHA);}
| (DIGITO)+ ({LA(2)!=' '&&LA(2)!='\n'&&LA(2)!='\r'}? ',' (DIGITO)+)?
{$setType (NUM);}
| '_' (LETRA|DIGITO|'_')*
{$setType (IDENT);}
;
CADENA :
'\''!
( ~('\''|'\n'|'\r'|'\t') )*
'\''!
| '"'! ( ~('"'|'\n'|'\r'|'\t') )* '"'!
;
-------------- next part --------------
header
{
import java.util.*;
import antlr.CommonToken;
}
class CobolCommand extends Parser;
options {
buildAST = true;
importVocab = CobolLexVocab;
exportVocab = CobolCommandVocab;
}
tokens {
/* tokens */
PROGRAMA;
SENTENCIAS;
PARRAFO;
PERFORM_PARRAFO;
PERFORM_INLINE;
CICS_GENERICO;
SQL_GENERICO;
CONDICION;
LIST_EXPRESIONES;
DECLARACION;
CAMPO;
MOD_TEXTO;
INDICE;
OPERADOR;
EXPRESION;
LENGTHOF;
}
sentencia_interna [String tipo]
: sentencia
;
sentencia
:
(options {greedy=true;}:c:comando) *
;
comando
: comando_if
|comando_perform
|comando_call
|comando_move
|comando_unstring
|comando_inspect
|comando_add
|comando_evaluate
|comando_initialize
|comando_string
|comando_set
|comando_search
|comando_subtract
|comando_multiply
|comando_compute
|comando_divide
|comando_open
|comando_close
|comando_read
|comando_write
|comando_display
|comando_sort
|(EXIT PROGRAM) => EXIT PROGRAM!
|EXIT!
|CONTINUE!
|NEXT! SENTENCE!
|GOBACK
;
comando_if
: IF^ condicion parte_then (options {greedy=true;}: ELSE! parte_else)? (options {greedy=true;}: END_IF!)?
;
condicion //returns [String s]
: c:condicionOR
{## = #( #[CONDICION, "CONDICION"] ,##);}
;
listaAritmetica
: expresionSuma
(COMA! expresionSuma)*
{## = #( #[LIST_EXPRESIONES, "LIST_EXPRESIONES"] ,##);}
;
condicionOR
: condicionAND (OR^ condicionAND)*
;
condicionAND
: expresionRelacional (AND^ expresionRelacional)*
;
expresionRelacional
: (expresionSuma)? (operador_relacional expresionSuma)*
{## = #( #[EXPRESION, "EXPRESION"] ,##);}
;
expresionSuma
: expresionPor ( (MAS^|MENOS^) expresionPor)*
;
expresionPor
: expresionUnitaria ( (POR^|DIV^) expresionUnitaria)*
;
expresionUnitaria
: MENOS^ expresionUnitaria
| MAS^ expresionUnitaria
| expresionUnitariaNoMasMenos
;
expresionUnitariaNoMasMenos
: NOT^ expresionUnitaria
| expresionPrimaria ((NOT)? (POSITIVE|NEGATIVE|NUMERIC|ALPHABETIC))?
;
expresionPrimaria
: campo
| literal
| PAR_AB! condicion PAR_CE!
;
operador_relacional
: (NOT_REL )?
(GREATER (THAN)? (OR EQUAL (TO)?)?
|MAYOR_IGUAL
|MAYOR
|LESS (THAN)? (OR EQUAL (TO)?)?
|MENOR_IGUAL
|MENOR
|EQUAL (TO)?
|IGUAL
)
{## = #( #[OPERADOR, "OPERADOR"] ,##);}
;
parte_then
: (THEN!)? sentencia_interna ["THEN"]
;
parte_else
: sentencia_interna["ELSE"]
;
fin_if
: END_IF!
;
comando_perform
{String p2 = "";}
: (PERFORM IDENT)=>
PERFORM i1:IDENT!
((THROUGH!|THRU!) i2:IDENT!{p2=#i2.getText();})?
perform_opc
{ String p = #i1.getText() + ((p2=="")?"":"/"+p2);
{## = #( #[PERFORM_PARRAFO, p] ,##);}
}
| PERFORM {## = #( #[PERFORM_INLINE, "PERFORM_INLINE"] ,##);}
perform_opc sentencia_interna ["PERFORM"] END_PERFORM!
;
perform_opc
: (test)? (perform_varying)? (perform_until)?
;
perform_varying
: VARYING^ campo FROM (campo|literal) BY (campo|literal)
;
perform_until
: UNTIL^ condicion
;
test!
: (WITH)? TEST (BEFORE|AFTER)
;
comando_call
: CALL^ (campo|CADENA) (call_using)?
;
call_using
: USING^ (campo)+
;
comando_move
: MOVE^ (CORRESPONDING!|CORR!)? (literal|campo) TO (campo (COMA!)?)+
;
comando_unstring
: UNSTRING^ campo (unstring_delimited)? unstring_into (unstring_with)? (unstring_tallying)?
(END_UNSTRING)?
;
unstring_into
: INTO^ ( campo (delimitador)? (contador)?)+
;
delimitador!
:DELIMITER^ (IN!)? campo
;
contador
:COUNT^ (IN!)? campo
;
unstring_delimited!
: DELIMITED^ (BY!)? (ALL!)? (campo|literal|SIZE) (OR (ALL)? (campo|literal|SIZE))*
;
unstring_with
: (WITH!)? POINTER^ campo
;
unstring_tallying
: TALLYING^ (IN!)? campo
;
comando_string
: STRING^ (lista_string)+ string_into (unstring_with)? (END_STRING!)?
;
string_into
: INTO^ lista_string
;
lista_string
: (options {greedy=true;}:campo|literal)+ (DELIMITED! (BY!)? (campo!|literal!|SIZE!))?
;
comando_inspect
: INSPECT^ campo (parte_tallying
|parte_replacing
|parte_converting)+
;
parte_tallying
: TALLYING^ ( campo FOR ( (CHARACTERS (before_after)*| (ALL|LEADING) ((campo|literal) (before_after)*)+))+)+
;
parte_replacing
: REPLACING^ ( (CHARACTERS BY (campo|literal) (before_after)*
|(ALL|LEADING|FIRST) (parte_by (before_after)*)+))+
;
parte_by
: un_by (options {greedy=true;}:(COMA!)? un_by)*
;
un_by:
(campo|literal) BY (campo|literal)
;
parte_converting
: CONVERTING^ (campo|literal) TO (campo|literal) (before_after)*
;
before_after
: (BEFORE|AFTER) (INITIAL)? (campo|literal)
;
comando_initialize
: INITIALIZE^ (campo)+ (initialize_replacing)?
;
initialize_replacing
:REPLACING^ (tipo_dato )+
;
tipo_dato
: (ALPHABETIC^|NUMERIC^|ALPHANUMERIC^) (DATA!)? BY (campo|literal)
;
comando_evaluate
: EVALUATE^ objeto_evaluate (ALSO objeto_evaluate)* (subcomando_when)+ (END_EVALUATE!)
;
subcomando_when
: WHEN^ ((condicion_when (ALSO condicion_when)*)|OTHER) (options {greedy=true;}:sentencia_interna ["WHEN"])?
;
condicion_when
: condicion (THRU condicion)?
;
objeto_evaluate
: (campo|literal) ((THROUGH!|THRU!) (campo|literal))?
;
comando_set!
: SET^ (campo)+ (TO|(UP|DOWN) BY) (campo|literal)
;
comando_search
: SEARCH^ campo (VARYING^ campo)? ((AT!)? END^ sentencia_interna["AT-END"])?
(options {greedy=true;}:subcomando_when)+ (options {greedy=true;}:END_SEARCH!)?
;
comando_add
: ADD^ (literal|campo)
TO (campo (ROUNDED)?)+
(giving)?
(size_error)?
(options {greedy=true;}:END_ADD!)?
;
comando_compute
: COMPUTE^ campo (ROUNDED)? IGUAL (expresionSuma)?
;
comando_subtract!
: SUBTRACT^ (campo|literal) FROM (campo|literal) (ROUNDED)? (giving)? (size_error)? (options {greedy=true;}:END_SUBTRACT!)?
;
comando_multiply
: MULTIPLY^ (campo|literal) BY (campo|literal) (ROUNDED)? (giving)? (size_error)? (options {greedy=true;}:END_MULTIPLY)?
;
comando_divide
: DIVIDE^ (campo|literal) (INTO|BY) (campo|literal) (ROUNDED)? (giving)? (size_error)? (options {greedy=true;}:END_DIVIDE)?
;
size_error
: (NOT)? (ON!)? SIZE ERROR! sentencia_interna["SIZE-ERROR"]
// : SIZE ERROR! sentencia_interna["SIZE-ERROR"]
// | NOT (ON!)? SIZE ERROR! sentencia_interna["NOT-SIZE-ERROR"]
;
giving
: GIVING campo (ROUNDED)?
;
comando_open
: OPEN^ ((INPUT|OUTPUT) IDENT)+
;
comando_close
: CLOSE^ (IDENT)+
;
comando_read
: READ^ IDENT (read_into)? (read_end)? (read_no_end)? (options {greedy=true;}:END_READ!)?
;
next_record
: NEXT (RECORD)?
| RECORD
;
read_into
: INTO^ campo
;
read_end
: AT! END sentencia_interna["END"]
;
read_no_end
: NOT AT! END sentencia_interna["NOT-END"]
;
comando_write
: WRITE^ campo FROM! IDENT (options {greedy=true;}:END_WRITE!)?
;
comando_display
: DISPLAY^ (campo|literal)+ (display_upon)? (display_with)?
;
display_upon
: UPON^ IDENT
;
display_with
: (WITH!)? NO ADVANCING!
;
comando_sort
: SORT IDENT (ON!)? (ASCENDING|DESCENDING) (KEY!) IDENT
( (WITH!) DUPLICATES (IN!) (ORDER!)? )?
( (COLLATING!)? SEQUENCE IDENT)?
( (USING IDENT)
|(INPUT PROCEDURE! IDENT ((THRU|THROUGH) IDENT)?)
)
( (GIVING IDENT)
|(OUTPUT PROCEDURE! IDENT ((THRU|THROUGH) IDENT)?)
)
;
literal
: NUM
| CADENA
| cero
| SPACE
| SPACES
| LIT_HEXADECIMAL
| NULL
| LENGTH^ {##.setType(LENGTHOF);} OF! campo
;
cero
: ZERO
| ZEROS
| ZEROES
;
campo
: in_of ( options {greedy=true;}:modif_campo)*
;
in_of
: IDENT ( options {greedy=true;}:( IN!|OF!) IDENT^)*
;
modif_campo
: (PAR_AB expresionSuma DOSPUNTOS) => PAR_AB! expresionSuma DOSPUNTOS!
(expresionSuma)?PAR_CE!
{##= #( #[MOD_TEXTO,"MOD_TEXTO"],##)}
| PAR_AB! listaAritmetica PAR_CE!
{##= #( #[INDICE,"INDICE"],##)}
;
punto_fin!
: FIN_TODO
;
More information about the antlr-interest
mailing list