[antlr-interest] Still having problems with the lexer code

johnclarke72 johnclarke at hotmail.com
Thu May 9 14:45:05 PDT 2002


A number of people have offered me advice regarding this problem but 
so far I have not been able to solve it.

When I compile and run the application I then enter <!-- test --> and 
expect to see :
HTML Comment : <!-- test --> on the screen. But all I see is :

line 1: unexpected token: <!-
exception: antlr.TokenStreamRecognitionException: unexpected char: -

I cannot see what is causing the problem. It is probably something 
very simple that I have missed out. I would be grateful for any 
advice offered.

Best Wishes

John

The Grammar for the Text Lexer
==============================

// Import the Required Classes
header
{
import java.util.*;
import antlr.*;
}

// The Class
class TextLexer extends Lexer;

// Set the Options for the Lexer
options
{
k=3; // Set the Look Ahead to 3 
Characters
charVocabulary = '\1' .. '\377'; // Set the Lexer Character 
Vocabulary
testLiterals = false; // Don't test against the 
Literals table
}

// The routine that will allow us to switch between Selectors
{
// The current Selector
TokenStreamSelector selector;

// The method that will enable us to switch between Selectors
public void setSelector(TokenStreamSelector tokenStreamSelector)
{
selector = tokenStreamSelector;
}

}

HTMLCOMMENT : "<!-" {selector.select("HTMLTagLexer");};

// TEXT
WORD : ( ~ (' '|'\r'|'\n'|'\t'|'<') ) +;

// Ignore all White Space
WS : ( ' '
| '\t'
| '\r' '\n' { newline(); }
| '\n' { newline(); }
)
{$setType(Token.SKIP);} //ignore this token
;

The Grammar for the Tag Lexer
=============================
// Import the Required Classes
header
{
import java.util.*;
import antlr.*;
}

// The Class
class HTMLTagLexer extends Lexer;

// Set the Options for the Lexer
options
{
k=3; // Set the Look Ahead to 3 
Characters
charVocabulary = '\1' .. '\377'; // Set the Lexer Character 
Vocabulary
testLiterals = false; // Don't test against the 
Literals table
importVocab = Tagged; // The Vocabulary to import
exportVocab = HTMLTags; // Export the Vocabulary to 
HTMLTags
}

// The routine that will allow us to switch between Selectors
{
// The current Selector
TokenStreamSelector selector;

// The method that will enable us to switch between Selectors
public void setSelector(TokenStreamSelector tokenStreamSelector)
{
selector = tokenStreamSelector;
}

}

// HTML Comment Definition
HTMLCOMMENT : "<!--" (options { greedy=false; }: .) * "-->";

// Ignore all White Space
WS : ( ' '
| '\t'
| '\r' '\n' { newline(); }
| '\n' { newline(); }
)
{$setType(Token.SKIP);} //ignore this token
;

The Grammar for the Parser
==========================

// Import the Required Classes
header
{
import java.util.*;
import antlr.*;
}

// The Class
class HTMLParser extends Parser;

// Set the Options for the Parser
options
{
importVocab = Tagged; // The Vocabulary to import
}

// Define the starting point for processing the HTML
processData :
(
text:WORD {System.out.println("TEXT " + text.getText());}
| comment:HTMLComment {System.out.println("HTML Comment " + 
comment.getText());}
)+;

The Java Application
====================

import java.io.*;
import antlr.*;

// The HTMLParserApp Class
class HTMLParserApp
{

// The Main function
public static void main(String[] args)
{
try
{
// Create the required Lexers
HTMLTagLexer htmlTagLexer = new HTMLTagLexer(new 
DataInputStream(System.in));
TextLexer textLexer = new TextLexer
(htmlTagLexer.getInputState());

// Create the TokenStreamSelector and add the required 
Lexers to it
TokenStreamSelector tokenStreamSelector = new 
TokenStreamSelector();
tokenStreamSelector.addInputStream
(htmlTagLexer,"HTMLTagLexer");
tokenStreamSelector.addInputStream(textLexer,"TextLexer");

// Select the starting Lexer
tokenStreamSelector.select("TextLexer");

// Add the TokenStreamSelector to the Required Lexers
htmlTagLexer.setSelector(tokenStreamSelector);
textLexer.setSelector(tokenStreamSelector);

// Create the HTML Parser
HTMLParser htmlParser = new HTMLParser(tokenStreamSelector);

// Process the HTML
htmlParser.processData();

} catch(Exception e)
{
System.err.println("exception: "+e);
}
}
}



 

Your use of Yahoo! Groups is subject to http://docs.yahoo.com/info/terms/ 



More information about the antlr-interest mailing list