[antlr-interest] Re: A question regarding Token Stream Multi plexing (aka "Lexer states")

Mon May 6 07:43:37 PDT 2002

In your main lexer you match "<!-" and then switch to HTMLTagLexer.  So what
is left to match is "- test -->".  It then tries to match that first '-'
against either the HTMLCOMMENT or WS rules but neither work, so it complains
that it doesn't know what to do.  HTMLCOMMENT should probably be something
like

HTMLCOMMENT :  (options { greedy=false; }: .) * "-->" {selector.pop();};

And why do you have a WS rule in the HTMLTagLexer?  Do you need it there?

Monty

> -----Original Message-----
> From: johnclarke72 [mailto:johnclarke at hotmail.com]
> Sent: Monday, May 06, 2002 7:15 AM
> To: antlr-interest at yahoogroups.com
> Subject: [antlr-interest] Re: A question regarding Token Stream
> Multiplexing (aka "Lexer states")
> 
> 
> When I compile and run the application I then enter <!-- test --> and 
> expect to see :
> HTML Comment : <!-- test --> on the screen.  But all I see is :
> 
> line 1: unexpected token: <!-
> exception: antlr.TokenStreamRecognitionException: unexpected char: -
> 
> I cannot see what is causing the problem.  It is probably something 
> very simple that I have missed out.  I would be grateful for any 
> advice offered.
> 
> Best Wishes
> 
> John
> 
> The Grammar for the Text Lexer
> ==============================
> 
> // Import the Required Classes
> header
> {
>    import java.util.*;
>    import antlr.*;
> }
> 
> // The Class
> class TextLexer extends Lexer;
> 
> // Set the Options for the Lexer
> options
> {
>   k=3;                                  // Set the Look Ahead to 3 
> Characters
>   charVocabulary = '\1' .. '\377';      // Set the Lexer Character 
> Vocabulary
>   testLiterals = false;                 // Don't test against the 
> Literals table
> }
> 
> // The routine that will allow us to switch between Selectors
> {
>     // The current Selector
>     TokenStreamSelector selector;
> 
>     // The method that will enable us to switch between Selectors
>     public void setSelector(TokenStreamSelector tokenStreamSelector)
>     {
>         selector = tokenStreamSelector;
>     }
> 
> }
> 
> HTMLCOMMENT : "<!-" {selector.select("HTMLTagLexer");};
> 
> // TEXT
> WORD : ( ~ (' '|'\r'|'\n'|'\t'|'<') ) +;
> 
> // Ignore all White Space
> WS      :       (       ' '
>                 |       '\t'
> 		|	'\r' '\n' { newline(); }
> 		|	'\n' { newline(); }
> 		)
> 		{$setType(Token.SKIP);}	//ignore this token
> 	;
> 
> The Grammar for the Tag Lexer
> =============================
> // Import the Required Classes
> header
> {
>    import java.util.*;
>    import antlr.*;
> }
> 
> // The Class
> class HTMLTagLexer extends Lexer;
> 
> // Set the Options for the Lexer
> options
> {
>   k=3;                                  // Set the Look Ahead to 3 
> Characters
>   charVocabulary = '\1' .. '\377';      // Set the Lexer Character 
> Vocabulary
>   testLiterals = false;                 // Don't test against the 
> Literals table
>   importVocab = Tagged;                 // The Vocabulary to import
>   exportVocab = HTMLTags;               // Export the Vocabulary to 
> HTMLTags
> }
> 
> // The routine that will allow us to switch between Selectors
> {
>     // The current Selector
>     TokenStreamSelector selector;
> 
>     // The method that will enable us to switch between Selectors
>     public void setSelector(TokenStreamSelector tokenStreamSelector)
>     {
>         selector = tokenStreamSelector;
>     }
> 
> }
> 
> // HTML Comment Definition
> HTMLCOMMENT : "<!--" (options { greedy=false; }: .) * "-->";
> 
> // Ignore all White Space
> WS      :       (       ' '
>                 |       '\t'
> 		|	'\r' '\n' { newline(); }
> 		|	'\n' { newline(); }
> 		)
> 		{$setType(Token.SKIP);}	//ignore this token
> 	;
> 
> The Grammar for the Parser
> ==========================
> 
> // Import the Required Classes
> header
> {
>    import java.util.*;
>    import antlr.*;
> }
> 
> // The Class
> class HTMLParser extends Parser;
> 
> // Set the Options for the Parser
> options
> {
>   importVocab = Tagged;                 // The Vocabulary to import
> }
> 
> // Define the starting point for processing the HTML
> processData :
> (
>  text:WORD {System.out.println("TEXT " + text.getText());}
>  | comment:HTMLComment {System.out.println("HTML Comment " + 
> comment.getText());}
> )+;
> 
> The Java Application
> ====================
> 
> import java.io.*;
> import antlr.*;
> 
> // The HTMLParserApp Class
> class HTMLParserApp
> {
>  
>    // The Main function
>    public static void main(String[] args)
>    {
>       try
>       {
>          // Create the required Lexers
>          HTMLTagLexer htmlTagLexer = new HTMLTagLexer(new 
> DataInputStream(System.in));
>          TextLexer textLexer = new TextLexer
> (htmlTagLexer.getInputState());
> 
>          // Create the TokenStreamSelector and add the required 
> Lexers to it
>          TokenStreamSelector tokenStreamSelector = new 
> TokenStreamSelector();
>          tokenStreamSelector.addInputStream
> (htmlTagLexer,"HTMLTagLexer");
>          tokenStreamSelector.addInputStream(textLexer,"TextLexer");
> 
>          // Select the starting Lexer
>          tokenStreamSelector.select("TextLexer");
> 
>          // Add the TokenStreamSelector to the Required Lexers
>          htmlTagLexer.setSelector(tokenStreamSelector);
>          textLexer.setSelector(tokenStreamSelector);
> 
>          // Create the HTML Parser
>          HTMLParser htmlParser = new HTMLParser(tokenStreamSelector);
> 
>          // Process the HTML
>          htmlParser.processData();
>         
>       } catch(Exception e)
>         {
>           System.err.println("exception: "+e);
>         }
>     }
> }
> 
> 
> 
>  
> 
> Your use of Yahoo! Groups is subject to 
> http://docs.yahoo.com/info/terms/ 
> 
> 
> 

Your use of Yahoo! Groups is subject to http://docs.yahoo.com/info/terms/