[antlr-interest] Changes for stream offset determination
Jim Crafton
jim.crafton at gmail.com
Wed Oct 26 16:53:12 PDT 2005
Here are the changes I made to allow a custom AST node to determine
the current offset of the lexer.
In CharScanner.hpp, class antlr::CharScanner
protected:
unsigned int offset_;
public:
unsigned int offset() const {
return offset_;
}
virtual void consume()
{
if (inputState->guessing == 0)
{
int c = LA(1);
if (caseSensitive)
{
append(c);
}
else
{
// use input.LA(), not LA(), to get original case
// CharScanner.LA() would toLower it.
append(inputState->getInput().LA(1));
}
//*************************************************************
offset_ ++;
//*************************************************************
// RK: in a sense I don't like this automatic handling.
if (c == '\t')
tab();
else
inputState->column++;
}
inputState->getInput().consume();
}
I increment the offset_ member in the consume() method.
In CharScanner.cpp
CharScanner::CharScanner(InputBuffer& cb, bool case_sensitive )
: saveConsumedInput(true) //, caseSensitiveLiterals(true)
, offset_(0) <<<<<<<<<<<<-------I added initializer for offset to 0
, caseSensitive(case_sensitive)
, literals(CharScannerLiteralsLess(this))
, inputState(new LexerInputState(cb))
, commitToPath(false)
, tabsize(8)
, traceDepth(0)
{
setTokenObjectFactory(&CommonToken::factory);
}
CharScanner::CharScanner(InputBuffer* cb, bool case_sensitive )
: saveConsumedInput(true) //, caseSensitiveLiterals(true)
, offset_(0) <<<<<<<<<<<<-------I added initializer for offset to 0
, caseSensitive(case_sensitive)
, literals(CharScannerLiteralsLess(this))
, inputState(new LexerInputState(cb))
, commitToPath(false)
, tabsize(8)
, traceDepth(0)
{
setTokenObjectFactory(&CommonToken::factory);
}
CharScanner::CharScanner( const LexerSharedInputState& state, bool
case_sensitive )
: saveConsumedInput(true) //, caseSensitiveLiterals(true)
, offset_(0) <<<<<<<<<<<<-------I added initializer for offset to 0
, caseSensitive(case_sensitive)
, literals(CharScannerLiteralsLess(this))
, inputState(state)
, commitToPath(false)
, tabsize(8)
, traceDepth(0)
{
setTokenObjectFactory(&CommonToken::factory);
}
In Token.hpp, class antlr::Token
public:
virtual void setOffset( unsigned int offset ){
}
virtual unsigned int getOffset() const{
return 0;
}
In CommonToken.hpp class antlr::CommonToken
protected:
unsigned int offset_;
public:
virtual void setOffset( unsigned int offset ) {
offset_ = offset;
}
virtual unsigned int getOffset() const {
return offset_;
}
In CommonToken.cpp
CommonToken::CommonToken() : Token(), line(1), col(1), offset_(0), text("")
{}
CommonToken::CommonToken(int t, const ANTLR_USE_NAMESPACE(std)string& txt)
: Token(t)
, line(1)
, col(1)
,offset_(0)
, text(txt)
{}
CommonToken::CommonToken(const ANTLR_USE_NAMESPACE(std)string& s)
: Token()
, line(1)
, col(1)
, offset_(0)
, text(s)
{}
Note that the offset_ member is initialized to 0.
Then in my custom AST class I do something like this:
class CppASTNode : public CommonAST {
public:
CppASTNode(): line_(0), column_(0), offset_(0) {}
CppASTNode( antlr::RefToken t ): line_(0), column_(0), offset_(0) {
CommonAST::setType(t->getType() );
CommonAST::setText(t->getText() );
line_ = t->getLine();
column_ = t->getColumn();
offset_ = t->getOffset() - t->getText().size();
}
void initialize(int t, const std::string& txt) {
CommonAST::setType(t);
CommonAST::setText(txt);
line_ = 0; // to be noticed !
column_ = 0;
}
void initialize( RefCppASTNode t ) {
CommonAST::setType(t->getType() );
CommonAST::setText(t->getText() );
line_ = t->line_;
column_ = t->column_;
offset_ = t->offset_;
}
void initialize( RefAST t ) {
CommonAST::initialize(t);
}
void initialize( antlr::RefToken t ) {
CommonAST::initialize(t);
line_ = t->getLine();
column_ = t->getColumn();
offset_ = t->getOffset() - t->getText().size();
}
void setText(const std::string& txt) {
CommonAST::setText(txt);
}
void setType(int type) {
CommonAST::setType(type);
}
static antlr::RefAST factory( void ) {
antlr::RefAST ret =
static_cast<antlr::RefAST>(RefCppASTNode(new CppASTNode()));
return ret;
}
int getLineNumber() const {
return line_;
}
int getColumnNumber() const {
return column_;
}
unsigned int getOffset() const {
return offset_;
}
protected:
int line_;
int column_;
unsigned int offset_;
};
Cheers, and hope this proves useful to others.
Jim
More information about the antlr-interest
mailing list