[antlr-interest] Re: A question regarding Token Stream Multiplexing (aka "Lexer states")

johnclarke72 johnclarke at hotmail.com
Mon May 6 07:14:55 PDT 2002


When I compile and run the application I then enter <!-- test --> and 
expect to see :
HTML Comment : <!-- test --> on the screen.  But all I see is :

line 1: unexpected token: <!-
exception: antlr.TokenStreamRecognitionException: unexpected char: -

I cannot see what is causing the problem.  It is probably something 
very simple that I have missed out.  I would be grateful for any 
advice offered.

Best Wishes

John

The Grammar for the Text Lexer
==============================

// Import the Required Classes
header
{
   import java.util.*;
   import antlr.*;
}

// The Class
class TextLexer extends Lexer;

// Set the Options for the Lexer
options
{
  k=3;                                  // Set the Look Ahead to 3 
Characters
  charVocabulary = '\1' .. '\377';      // Set the Lexer Character 
Vocabulary
  testLiterals = false;                 // Don't test against the 
Literals table
}

// The routine that will allow us to switch between Selectors
{
    // The current Selector
    TokenStreamSelector selector;

    // The method that will enable us to switch between Selectors
    public void setSelector(TokenStreamSelector tokenStreamSelector)
    {
        selector = tokenStreamSelector;
    }

}

HTMLCOMMENT : "<!-" {selector.select("HTMLTagLexer");};

// TEXT
WORD : ( ~ (' '|'\r'|'\n'|'\t'|'<') ) +;

// Ignore all White Space
WS      :       (       ' '
                |       '\t'
		|	'\r' '\n' { newline(); }
		|	'\n' { newline(); }
		)
		{$setType(Token.SKIP);}	//ignore this token
	;

The Grammar for the Tag Lexer
=============================
// Import the Required Classes
header
{
   import java.util.*;
   import antlr.*;
}

// The Class
class HTMLTagLexer extends Lexer;

// Set the Options for the Lexer
options
{
  k=3;                                  // Set the Look Ahead to 3 
Characters
  charVocabulary = '\1' .. '\377';      // Set the Lexer Character 
Vocabulary
  testLiterals = false;                 // Don't test against the 
Literals table
  importVocab = Tagged;                 // The Vocabulary to import
  exportVocab = HTMLTags;               // Export the Vocabulary to 
HTMLTags
}

// The routine that will allow us to switch between Selectors
{
    // The current Selector
    TokenStreamSelector selector;

    // The method that will enable us to switch between Selectors
    public void setSelector(TokenStreamSelector tokenStreamSelector)
    {
        selector = tokenStreamSelector;
    }

}

// HTML Comment Definition
HTMLCOMMENT : "<!--" (options { greedy=false; }: .) * "-->";

// Ignore all White Space
WS      :       (       ' '
                |       '\t'
		|	'\r' '\n' { newline(); }
		|	'\n' { newline(); }
		)
		{$setType(Token.SKIP);}	//ignore this token
	;

The Grammar for the Parser
==========================

// Import the Required Classes
header
{
   import java.util.*;
   import antlr.*;
}

// The Class
class HTMLParser extends Parser;

// Set the Options for the Parser
options
{
  importVocab = Tagged;                 // The Vocabulary to import
}

// Define the starting point for processing the HTML
processData :
(
 text:WORD {System.out.println("TEXT " + text.getText());}
 | comment:HTMLComment {System.out.println("HTML Comment " + 
comment.getText());}
)+;

The Java Application
====================

import java.io.*;
import antlr.*;

// The HTMLParserApp Class
class HTMLParserApp
{
 
   // The Main function
   public static void main(String[] args)
   {
      try
      {
         // Create the required Lexers
         HTMLTagLexer htmlTagLexer = new HTMLTagLexer(new 
DataInputStream(System.in));
         TextLexer textLexer = new TextLexer
(htmlTagLexer.getInputState());

         // Create the TokenStreamSelector and add the required 
Lexers to it
         TokenStreamSelector tokenStreamSelector = new 
TokenStreamSelector();
         tokenStreamSelector.addInputStream
(htmlTagLexer,"HTMLTagLexer");
         tokenStreamSelector.addInputStream(textLexer,"TextLexer");

         // Select the starting Lexer
         tokenStreamSelector.select("TextLexer");

         // Add the TokenStreamSelector to the Required Lexers
         htmlTagLexer.setSelector(tokenStreamSelector);
         textLexer.setSelector(tokenStreamSelector);

         // Create the HTML Parser
         HTMLParser htmlParser = new HTMLParser(tokenStreamSelector);

         // Process the HTML
         htmlParser.processData();
        
      } catch(Exception e)
        {
          System.err.println("exception: "+e);
        }
    }
}



 

Your use of Yahoo! Groups is subject to http://docs.yahoo.com/info/terms/ 



More information about the antlr-interest mailing list