[antlr-interest] Lexer error

Brian Catlin BrianC at sannas.org
Wed Apr 14 00:57:02 PDT 2010


The following grammar compiles without any sort of warnings or errors, and
ANTLRworks doesn't complain either, but when I call the parser, it returns a
warning for each character in the string to be parsed.  I know it has
something to do with the FILE_NAME rule, but I don't know how to fix it.  I
suspect that the lexer cannot create a token because the FILE_NAME rule
could also match any other token (a file name on Windows can contain just
about any character).  I've structured my grammar so that the FILE_NAME is
always the last token on a line, so I figured ANTLR would be able to figure
it out from that context, but that doesn't appear to be the case.  So, how
can I describe it to ANTLR? 

 

Any help would be greatly appreciated!

 

-Brian

 

 

DT> dump mbr

-memory-(1) : lexer error 3 :

         at offset 0, near 'D' :

        dump mbr

-memory-(1) : lexer error 3 :

         at offset 1, near 'U' :

        ump mbr

-memory-(1) : lexer error 3 :

         at offset 2, near 'M' :

        mp mbr

-memory-(1) : lexer error 3 :

         at offset 3, near 'P' :

        p mbr

-memory-(1) : lexer error 3 :

         at offset 5, near 'M' :

        mbr

-memory-(1) : lexer error 3 :

         at offset 6, near 'B' :

        br

-memory-(1) : lexer error 3 :

         at offset 7, near 'R' :

        r

 

//

// This grammar defines the commands available to the DiskTool (DT) program

//

 

grammar Commands;

 

options 

      {

      output = AST;

      ASTLabelType = pANTLR3_BASE_TREE;

      language = C;

      backtrack = true;

      memoize = true;

      }

 

@lexer::header

{

#define     ANTLR3_INLINE_INPUT_ASCII

}

 

//+

// Productions

//-

 

commands

      :

      (script_command

      | dump_command

      | show_command

      )*;

 

script_command

      :  '@' 

      FILE_NAME

      ;

 

dump_command

      : DUMP

      ( dump_struct

      | dump_block

      | a_file

      );

 

show_command

      : SHOW

      ( structure_nouns

      | storage_nouns

      | a_file

      );

      

mbr_vbr

      : MBR 

      | VBR

      ;

 

block_nouns

      : LBN 

      | LCN 

      | VBN 

      | VCN

      ;

 

structure_nouns

      : MBR

      | VBR

      ;

 

dump_block

 

      : block_nouns

      number

      (

      (',' number

      )

      | 

      (':' number

      ))?

      DRIVE_NAME?

      ;

 

dump_struct

      : mbr_vbr

      ('/' qualifier)?

      DRIVE_NAME?

      ;

 

storage_nouns

      : DISK

      | VOLUME

      ;

      

a_file

      : FILE

      FILE_NAME

      ;

 

number

      : DEC_NUMBER 

      | HEX_NUMBER

      ;

 

qualifier

      : ALL

      | CODE

      | TABLE

      ;

 

//+

// Tokens

//-

 

// Verbs

 

DUMP        : 'DUMP';

SHOW        : 'SHOW';

 

// Nouns

 

DISK        : 'DISK';

FILE        : 'FILE';

LBN         : 'LBN';

LCN         : 'LCN';

MBR         : 'MBR';

PBN         : 'PBN';

VBN         : 'VBN';

VBR         : 'VBR';

VCN         : 'VCN';

VOLUME      : 'VOLUME';

 

// Qualifiers

 

ALL         : 'ALL';

CODE        : 'CODE';

TABLE       : 'TABLE';

 

// Miscellaneous tokens

 

DRIVE_NAME

      : LETTER ':';

 

fragment

LETTER      : 'A'..'Z';

 

fragment

DIGIT : '0'..'9';

 

fragment

HEX_DIGIT   : (DIGIT | 'A'..'F');

 

HEX_NUMBER  : '0X' HEX_DIGIT+;

 

DEC_NUMBER  : DIGIT+;

 

fragment

FILE_NAME

      :  ~('|' | '<' | '>' | '*' | '?' | '\r' | '\n')+ (('\r'? '\n') | EOF);

 

LINE_COMMENT

      : '!' ~('\n'|'\r')* (('\r'? '\n') | EOF) {$channel=HIDDEN;};

 

WS    : (' ' | '\t' | '\r' | '\n')+ {$channel=HIDDEN;};

 

 

 

#include <windows.h>

#include <stdio.h>

 

#include "CommandsLexer.h"                                              //
Generated by ANTLR from Commands.g

#include "CommandsParser.h"                                             //
Generated by ANTLR from Commands.g

 

 

 

void main (int Argc, char* Argv[])

{

DWORD                                     status;

char*                                     ptr;

char                                      command [1024];

DWORD                                     command_len;

pANTLR3_INPUT_STREAM                input;

pANTLR3_COMMON_TOKEN_STREAM         tstream;

pCommandsLexer                            lexer;

pCommandsParser                           parser;

CommandsParser_commands_return      commands_ast;

pANTLR3_COMMON_TREE_NODE_STREAM     nodes;

//pCommandsDumpDecl                       tree_parser;

 

 

      //+

      // Display our prompt and read a command string from the console

      //-

 

      while (TRUE)

            {

            printf ("DT> ");

            

            //+

            // Read the entire line

            //-

 

            if ((ptr = gets_s ((char *)command, sizeof (command))) != NULL)

                  {

                  command_len = strlen ((char*)command);

 

                  //+

                  // Only try to parse the input if there is something there

                  //-

 

                  if (command_len > 0)

                        {

 

                        //+

                        // Create the input stream

                        //-

 

                        if ((input = antlr3NewAsciiStringInPlaceStream
((pANTLR3_UINT8)&command, (ANTLR3_UINT64) command_len, NULL)) != 0)

                              {

 

                              //+

                              // Tell ANTLR to use upper-case when matching
tokens

                              //-

 

                              input->setUcaseLA (input, ANTLR3_TRUE);

 

                              //+

                              // Create a new instance of the lexer using
our input stream

                              //-

 

                              if ((lexer = CommandsLexerNew (input)) != 0)

                                    {

 

                                    //+

                                    // Create the token stream

                                    //-

 

                                    if ((tstream =
antlr3CommonTokenStreamSourceNew (ANTLR3_SIZE_HINT, TOKENSOURCE(lexer))) !=
0)

                                          {

 

                                          //+

                                          // Create a new instance of the
parser using our lexer

                                          //-

 

                                          if ((parser = CommandsParserNew
(tstream)) != 0)

                                                {

 

                                                //+

                                                // Call the parser with the
start symbol

                                                //-

 

                                                commands_ast =
parser->commands (parser); 

 

                                                //+

                                                // Check for errors parsing
the input

                                                //-

 

                                                if
(parser->pParser->rec->state->errorCount == 0)

                                                      {

 

                                                      //+

                                                      // The input was
parsed successfully.  Use the Abstract Syntax Tree

                                                      // which contains a
linked list of nodes containing the tokens that

                                                      // were parsed

                                                      //-

 

                                                      nodes =
antlr3CommonTreeNodeStreamNewTree (commands_ast.tree, ANTLR3_SIZE_HINT);

                                                      printf ("Commands
tree: %s\n", commands_ast.tree->toStringTree (commands_ast.tree)->chars);

//                                                    tree_parser =
CommandsDumpDeclNew (nodes);

 

//                                                    tree_parser->decl
(tree_parser);

//                                                    nodes->free (nodes);

//                                                    tree_parser->free
(tree_parser);

                                                      }

                                                else

                                                      {

                                                      printf ("Errors found
during parsing: %d\n", parser->pParser->rec->state->errorCount);

                                                      }

 

                                                //+

                                                // We're now done with these
instances, so free them

                                                //-

 

                                                parser->free (parser);

                                                tstream->free (tstream);

                                                lexer->free (lexer);

                                                input->close (input);

                                                }

                                          else

                                                {

                                                status = GetLastError ();

                                                printf ("Error creating
parser, status = %08x\n", status);

                                                break;

                                                }

 

                                          }

                                    else

                                          {

                                          status = GetLastError ();

                                          printf ("Unable to create token
stream, status = %08x\n", status);

                                          break;

                                          }

 

                                    }

                              else

                                    {

                                    status = GetLastError ();

                                    printf ("Unable to create lexer, status
= %08x\n", status);

                                    break;

                                    }

 

                              }

                        else

                              {

                              status = GetLastError ();

                              printf ("Error creating the input stream,
status = %08x\n", status);

                              break;

                              }

 

                        }

 

                  }

 

 

            }     // End while

 

}

 



More information about the antlr-interest mailing list