[antlr-interest] Changes for stream offset determination

Jim Crafton jim.crafton at gmail.com
Wed Oct 26 16:53:12 PDT 2005


Here are the changes I made to allow a custom AST node to determine
the current offset of the lexer.

In CharScanner.hpp, class antlr::CharScanner

protected:
unsigned int offset_;

public:
  unsigned int offset() const {
    return offset_;
  }


	virtual void consume()
	{
		if (inputState->guessing == 0)
		{
			int c = LA(1);
			if (caseSensitive)
			{
				append(c);
			}
			else
			{
				// use input.LA(), not LA(), to get original case
				// CharScanner.LA() would toLower it.
				append(inputState->getInput().LA(1));
			}
//*************************************************************
			offset_ ++;
//*************************************************************

			// RK: in a sense I don't like this automatic handling.
			if (c == '\t')
				tab();
			else
				inputState->column++;
		}
		inputState->getInput().consume();
	}

I increment the offset_ member in the consume() method.


In CharScanner.cpp

CharScanner::CharScanner(InputBuffer& cb, bool case_sensitive )
	: saveConsumedInput(true) //, caseSensitiveLiterals(true)
	, offset_(0) <<<<<<<<<<<<-------I added initializer for offset to 0
	, caseSensitive(case_sensitive)
	, literals(CharScannerLiteralsLess(this))
	, inputState(new LexerInputState(cb))
	, commitToPath(false)
	, tabsize(8)
	, traceDepth(0)
{
	setTokenObjectFactory(&CommonToken::factory);
}

CharScanner::CharScanner(InputBuffer* cb, bool case_sensitive )
	: saveConsumedInput(true) //, caseSensitiveLiterals(true)
	, offset_(0) <<<<<<<<<<<<-------I added initializer for offset to 0
	, caseSensitive(case_sensitive)
	, literals(CharScannerLiteralsLess(this))
	, inputState(new LexerInputState(cb))
	, commitToPath(false)
	, tabsize(8)
	, traceDepth(0)
{
	setTokenObjectFactory(&CommonToken::factory);
}

CharScanner::CharScanner( const LexerSharedInputState& state, bool
case_sensitive )
	: saveConsumedInput(true) //, caseSensitiveLiterals(true)
	, offset_(0) <<<<<<<<<<<<-------I added initializer for offset to 0
	, caseSensitive(case_sensitive)
	, literals(CharScannerLiteralsLess(this))
	, inputState(state)
	, commitToPath(false)
	, tabsize(8)
	, traceDepth(0)
{
	setTokenObjectFactory(&CommonToken::factory);
}


In Token.hpp, class antlr::Token

public:
virtual void setOffset( unsigned int offset ){

}

virtual unsigned int getOffset() const{
   return 0;
}


In CommonToken.hpp class antlr::CommonToken

protected:
unsigned int offset_;

public:
virtual void setOffset( unsigned int offset ) {
	offset_ = offset;
}

virtual unsigned int getOffset() const {
	return offset_;
}



In CommonToken.cpp

CommonToken::CommonToken() : Token(), line(1), col(1), offset_(0), text("")
{}

CommonToken::CommonToken(int t, const ANTLR_USE_NAMESPACE(std)string& txt)
: Token(t)
, line(1)
, col(1)
,offset_(0)
, text(txt)
{}

CommonToken::CommonToken(const ANTLR_USE_NAMESPACE(std)string& s)
: Token()
, line(1)
, col(1)
, offset_(0)
, text(s)
{}


Note that the offset_ member is initialized to 0.

Then in my custom AST class I do something like this:


class CppASTNode : public CommonAST {
public:

	CppASTNode(): line_(0), column_(0), offset_(0) {}


	CppASTNode( antlr::RefToken t ): line_(0), column_(0), offset_(0) {
		CommonAST::setType(t->getType() );
		CommonAST::setText(t->getText() );

		line_ = t->getLine();
		column_ = t->getColumn();
		offset_ = t->getOffset() - t->getText().size();
	}

	void initialize(int t, const std::string& txt) {
		CommonAST::setType(t);
		CommonAST::setText(txt);

		line_ = 0; // to be noticed !
		column_ = 0;
    }

	void initialize( RefCppASTNode t ) {
        CommonAST::setType(t->getType() );
		CommonAST::setText(t->getText() );

		line_ = t->line_;
		column_ = t->column_;

		offset_ = t->offset_;
    }

	void initialize( RefAST t ) {
        CommonAST::initialize(t);
    }

	void initialize( antlr::RefToken t )  {
        CommonAST::initialize(t);

		line_ = t->getLine();
		column_ = t->getColumn();
		offset_ = t->getOffset() - t->getText().size();
    }

	void setText(const std::string& txt) {
		CommonAST::setText(txt);
	}

	void setType(int type) {
		CommonAST::setType(type);
	}

	static antlr::RefAST factory( void ) {		
		antlr::RefAST ret =
			static_cast<antlr::RefAST>(RefCppASTNode(new CppASTNode()));	
		
		return ret;
	}

	int getLineNumber() const {
		return line_;
	}

	int getColumnNumber() const {
		return column_;
	}

	unsigned int getOffset() const {
		return offset_;
	}
protected:
	int line_;
	int column_;
	unsigned int offset_;
};


Cheers, and hope this proves useful to others.

Jim


More information about the antlr-interest mailing list