[antlr-interest] Still having problems with the lexer code
johnclarke72
johnclarke at hotmail.com
Thu May 9 14:45:05 PDT 2002
A number of people have offered me advice regarding this problem but
so far I have not been able to solve it.
When I compile and run the application I then enter <!-- test --> and
expect to see :
HTML Comment : <!-- test --> on the screen. But all I see is :
line 1: unexpected token: <!-
exception: antlr.TokenStreamRecognitionException: unexpected char: -
I cannot see what is causing the problem. It is probably something
very simple that I have missed out. I would be grateful for any
advice offered.
Best Wishes
John
The Grammar for the Text Lexer
==============================
// Import the Required Classes
header
{
import java.util.*;
import antlr.*;
}
// The Class
class TextLexer extends Lexer;
// Set the Options for the Lexer
options
{
k=3; // Set the Look Ahead to 3
Characters
charVocabulary = '\1' .. '\377'; // Set the Lexer Character
Vocabulary
testLiterals = false; // Don't test against the
Literals table
}
// The routine that will allow us to switch between Selectors
{
// The current Selector
TokenStreamSelector selector;
// The method that will enable us to switch between Selectors
public void setSelector(TokenStreamSelector tokenStreamSelector)
{
selector = tokenStreamSelector;
}
}
HTMLCOMMENT : "<!-" {selector.select("HTMLTagLexer");};
// TEXT
WORD : ( ~ (' '|'\r'|'\n'|'\t'|'<') ) +;
// Ignore all White Space
WS : ( ' '
| '\t'
| '\r' '\n' { newline(); }
| '\n' { newline(); }
)
{$setType(Token.SKIP);} //ignore this token
;
The Grammar for the Tag Lexer
=============================
// Import the Required Classes
header
{
import java.util.*;
import antlr.*;
}
// The Class
class HTMLTagLexer extends Lexer;
// Set the Options for the Lexer
options
{
k=3; // Set the Look Ahead to 3
Characters
charVocabulary = '\1' .. '\377'; // Set the Lexer Character
Vocabulary
testLiterals = false; // Don't test against the
Literals table
importVocab = Tagged; // The Vocabulary to import
exportVocab = HTMLTags; // Export the Vocabulary to
HTMLTags
}
// The routine that will allow us to switch between Selectors
{
// The current Selector
TokenStreamSelector selector;
// The method that will enable us to switch between Selectors
public void setSelector(TokenStreamSelector tokenStreamSelector)
{
selector = tokenStreamSelector;
}
}
// HTML Comment Definition
HTMLCOMMENT : "<!--" (options { greedy=false; }: .) * "-->";
// Ignore all White Space
WS : ( ' '
| '\t'
| '\r' '\n' { newline(); }
| '\n' { newline(); }
)
{$setType(Token.SKIP);} //ignore this token
;
The Grammar for the Parser
==========================
// Import the Required Classes
header
{
import java.util.*;
import antlr.*;
}
// The Class
class HTMLParser extends Parser;
// Set the Options for the Parser
options
{
importVocab = Tagged; // The Vocabulary to import
}
// Define the starting point for processing the HTML
processData :
(
text:WORD {System.out.println("TEXT " + text.getText());}
| comment:HTMLComment {System.out.println("HTML Comment " +
comment.getText());}
)+;
The Java Application
====================
import java.io.*;
import antlr.*;
// The HTMLParserApp Class
class HTMLParserApp
{
// The Main function
public static void main(String[] args)
{
try
{
// Create the required Lexers
HTMLTagLexer htmlTagLexer = new HTMLTagLexer(new
DataInputStream(System.in));
TextLexer textLexer = new TextLexer
(htmlTagLexer.getInputState());
// Create the TokenStreamSelector and add the required
Lexers to it
TokenStreamSelector tokenStreamSelector = new
TokenStreamSelector();
tokenStreamSelector.addInputStream
(htmlTagLexer,"HTMLTagLexer");
tokenStreamSelector.addInputStream(textLexer,"TextLexer");
// Select the starting Lexer
tokenStreamSelector.select("TextLexer");
// Add the TokenStreamSelector to the Required Lexers
htmlTagLexer.setSelector(tokenStreamSelector);
textLexer.setSelector(tokenStreamSelector);
// Create the HTML Parser
HTMLParser htmlParser = new HTMLParser(tokenStreamSelector);
// Process the HTML
htmlParser.processData();
} catch(Exception e)
{
System.err.println("exception: "+e);
}
}
}
Your use of Yahoo! Groups is subject to http://docs.yahoo.com/info/terms/
More information about the antlr-interest
mailing list