[antlr-interest] C Target: setUcaseLA bug?
Brian Catlin
BrianC at sannas.org
Wed Apr 14 19:54:19 PDT 2010
Is setUcaseLA known to work in ANTLR3c-3.2? At the beginning of my
generated lexer routine mTokens, it switches on LA(1) trying to build a
token, but LA(1) is returning the lowercase input character. As you can see
below, I call setUcaseLA immediately after the input stream was created.
#include <windows.h>
#include <stdio.h>
#include "CommandsLexer.h" //
Generated by ANTLR from Commands.g
#include "CommandsParser.h" //
Generated by ANTLR from Commands.g
void main (int Argc, char* Argv[])
{
DWORD status;
char* ptr;
char command [1024];
DWORD command_len;
pANTLR3_INPUT_STREAM input;
pANTLR3_COMMON_TOKEN_STREAM tstream;
pCommandsLexer lexer;
pCommandsParser parser;
CommandsParser_commands_return commands_ast;
pANTLR3_COMMON_TREE_NODE_STREAM nodes;
//pCommandsDumpDecl tree_parser;
//+
// Display our prompt and read a command string from the console
//-
while (TRUE)
{
printf ("DT> ");
//+
// Read the entire line
//-
if ((ptr = gets_s ((char *)command, sizeof (command))) != NULL)
{
command_len = strlen ((char*)command);
//+
// Only try to parse the input if there is something there
//-
if (command_len > 0)
{
//+
// Create the input stream
//-
if ((input = antlr3NewAsciiStringInPlaceStream
((pANTLR3_UINT8)&command, (ANTLR3_UINT64) command_len, NULL)) != 0)
{
//+
// Tell ANTLR to use upper-case when matching
tokens
//-
input->setUcaseLA (input, ANTLR3_TRUE);
//+
// Create a new instance of the lexer using
our input stream
//-
if ((lexer = CommandsLexerNew (input)) != 0)
{
//+
// Create the token stream
//-
if ((tstream =
antlr3CommonTokenStreamSourceNew (ANTLR3_SIZE_HINT, TOKENSOURCE(lexer))) !=
0)
{
//+
// Create a new instance of the
parser using our lexer
//-
if ((parser = CommandsParserNew
(tstream)) != 0)
{
//+
// Call the parser with the
start symbol
//-
commands_ast =
parser->commands (parser);
//+
// Check for errors parsing
the input
//-
if
(parser->pParser->rec->state->errorCount == 0)
{
//+
// The input was
parsed successfully. Use the Abstract Syntax Tree
// which contains a
linked list of nodes containing the tokens that
// were parsed
//-
nodes =
antlr3CommonTreeNodeStreamNewTree (commands_ast.tree, ANTLR3_SIZE_HINT);
printf ("Commands
tree: %s\n", commands_ast.tree->toStringTree (commands_ast.tree)->chars);
// tree_parser =
CommandsDumpDeclNew (nodes);
// tree_parser->decl
(tree_parser);
// nodes->free (nodes);
// tree_parser->free
(tree_parser);
}
else
{
printf ("Errors found
during parsing: %d\n", parser->pParser->rec->state->errorCount);
}
//+
// We're now done with these
instances, so free them
//-
parser->free (parser);
tstream->free (tstream);
lexer->free (lexer);
input->close (input);
}
else
{
status = GetLastError ();
printf ("Error creating
parser, status = %08x\n", status);
break;
}
}
else
{
status = GetLastError ();
printf ("Unable to create token
stream, status = %08x\n", status);
break;
}
}
else
{
status = GetLastError ();
printf ("Unable to create lexer, status
= %08x\n", status);
break;
}
}
else
{
status = GetLastError ();
printf ("Error creating the input stream,
status = %08x\n", status);
break;
}
}
}
} // End while
}
//
// This grammar defines the commands available to the DiskTool (DT) program
//
grammar Commands;
options
{
output = AST;
ASTLabelType = pANTLR3_BASE_TREE;
language = C;
backtrack = true;
memoize = true;
}
@lexer::header
{
#define ANTLR3_INLINE_INPUT_ASCII
}
//+
// Productions
//-
commands
:
(script_command
| dump_command
| show_command
)*;
script_command
: '@'
WIN_FILE_NAME
;
dump_command
: DUMP
( dump_struct
| dump_block
| a_file
);
show_command
: SHOW
( structure_nouns
| storage_nouns
| a_file
);
mbr_vbr
: MBR
| VBR
;
block_nouns
: LBN
| LCN
| VBN
| VCN
;
structure_nouns
: MBR
| VBR
;
dump_block
: block_nouns
number
(
(',' number
)
|
(':' number
))?
DRIVE_NAME?
;
dump_struct
: mbr_vbr
('/' qualifier)?
DRIVE_NAME?
;
storage_nouns
: DISK
| VOLUME
;
a_file
: FILE
WIN_FILE_NAME
;
number
: DEC_NUMBER
| HEX_NUMBER
;
qualifier
: ALL
| CODE
| TABLE
;
//+
// Tokens
//-
// Verbs
DUMP : 'DUMP';
SHOW : 'SHOW';
// Nouns
DISK : 'DISK';
FILE : 'FILE';
LBN : 'LBN';
LCN : 'LCN';
MBR : 'MBR';
PBN : 'PBN';
VBN : 'VBN';
VBR : 'VBR';
VCN : 'VCN';
VOLUME : 'VOLUME';
// Qualifiers
ALL : 'ALL';
CODE : 'CODE';
TABLE : 'TABLE';
// Miscellaneous tokens
DRIVE_NAME
: LETTER ':';
fragment
LETTER : 'A'..'Z';
fragment
DIGIT : '0'..'9';
fragment
HEX_DIGIT : (DIGIT | 'A'..'F');
HEX_NUMBER : '0X' HEX_DIGIT+;
DEC_NUMBER : DIGIT+;
WIN_FILE_NAME
: '"' ~('|' | '<' | '>' | '*' | '?' | '\r' | '\n' | '"')+ '"';
LINE_COMMENT
: '!' ~('\n'|'\r')* (('\r'? '\n') | EOF) {$channel=HIDDEN;};
WS : (' ' | '\t' | '\r' | '\n')+ {$channel=HIDDEN;};
More information about the antlr-interest
mailing list