[antlr-interest] Building syntax highlighters with ANTLR

Gerald Rosenberg gerald at certiv.net
Wed Apr 15 21:28:56 PDT 2009


A better approach is to use a predicate.  That way you don't have to 
intercept every lexer token and makes it considerably easier to 
handle multiple pair sets.

@lexer::members {
   public boolean pairMatch(int limit) {
     return PairMatcherHelper.pairMatch(input, limit);
   }
   public boolean pairMatch(int limit, char beg, char end) {
     return PairMatcherHelper.pairMatch(input, limit, beg, end);
   }
}

BRACE_BLOCK :'{' { pairMatch(200) }? ;
BRACKET_BLOCK :'[' { pairMatch(50, '[', ']') }? ;

PairMatcherHelper#pairMatch then does full nested pair matching, 
subject to certain limitations.  Does respect Antlr's backtracking semantics.

Note, the attached version is set up for just for single char delimiters.




At 10:56 AM 4/15/2009, Sam Harwell wrote:

>The new method uses a very different override of NextToken(). The 
>outer loop is largely a duplication of the functionality of 
>Lexer.NextToken(). I've highlighted the key section that reliably 
>manages the lexer state information (yay HTML email).
>
>public override IToken NextToken()
>{
>     for ( ; ; )
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://www.antlr.org/pipermail/antlr-interest/attachments/20090415/f01b8bfd/attachment.html 
-------------- next part --------------
package net.certiv.test;

import org.antlr.runtime.ANTLRStringStream;
import org.antlr.runtime.CharStream;

public class PairMatchHelper {

	private static boolean debug = false;

	public static boolean pairMatch(CharStream input, int limit) {
		return pairMatch(input, limit, '{', '}');
	}

	public static boolean pairMatch(CharStream input, int limit, char open, char close) {
		int nest = 1; // already matched & consumed open char
		boolean done = false;
		while (!done && limit > 0) {
			int la_1 = input.LA(1);
			if (la_1 == -1) return false;
			if (la_1 == '\\') {
				int la_2 = input.LA(2);
				if (la_2 == -1) return false;
				consume(input, limit);
				consume(input, limit);
			} else if (la_1 == '/') {
				int la_2 = input.LA(2);
				if (la_2 == -1) return false;
				if (la_2 == '/') { // consume '//' to eol
					consume(input, limit);
					do {
						consume(input, limit);
						la_1 = input.LA(1);
						if (la_1 == -1) return false;
					} while (!(la_1 == '\r' || la_1 == '\n'));
				} else {
					consume(input, limit);
				}
			} else if (la_1 == '\'' || la_1 == '"') {
				boolean goodString = matchString(input, limit, (char) la_1);
				if (!goodString) return false;
			} else if (la_1 == open) {
				nest++;
				consume(input, limit);
			} else if (la_1 == close) {
				nest--;
				consume(input, limit);
				if (nest == 0) done = true;
			} else {
				consume(input, limit);
			}
		}
		if (limit == 0) return false;
		return true;
	}

	private static boolean matchString(CharStream input, int limit, char c) {
		consume(input, limit); // already matched open char
		boolean done = false;
		while (!done && limit > 0) {
			int la_1 = input.LA(1);
			if (la_1 == -1) return false;
			if (la_1 == '\\') {
				int la_2 = input.LA(2);
				if (la_2 == -1) return false;
				consume(input, limit);
				consume(input, limit);
			} else if (la_1 == c) {
				consume(input, limit);
				done = true;
			} else {
				consume(input, limit);
			}
		}
		if (limit == 0) return false;
		return true;
	}

	private static void consume(CharStream input, int limit) {
		if (debug) System.out.print((char) input.LA(1));
		input.consume();
		limit--;
	}

	// //////////////////////////////////////////////////////////////////////////

	public static void main(String[] args) {
		debug = true;
		ANTLRStringStream input = new ANTLRStringStream(t2);
		boolean result = pairMatch(input, 1000);
		System.out.println("Result: " + result);
	}

	public static final String t1 = "hel'lo}and";
	public static final String t2 = "h{ell}o}and";
	public static final String t3 = "run(\"A{}\"); }";
}


More information about the antlr-interest mailing list