[antlr-interest] Escaping single quotes in a lexer

Emanuele Gesuato egesuato at ibc.it
Wed Apr 1 00:57:26 PDT 2009


Gavin Lambert wrote:
> At 21:54 30/03/2009, Emanuele Gesuato wrote:
>  >What if i want to maintain as inalterate as possibile the original
>  >STRING ?
>  >
>  >STRING : "'"((JOLLY)?(PAROLE|INTEGER)(JOLLY)?)"'";
>  >
>  >Is it possible to change it to something similar to
>  >
>  >STRING : "'"((JOLLY)?(PAROLE|INTEGER|ESCAPE)(JOLLY)?)"'";
>  >
>  >Where ESCAPE is a regexp to match the escaping of single quotes ?
> 
> Yes, it's possible, but you shouldn't do that.  It's best to match the 
> entire string (defined as "anything within quotes") as a single lexical 
> unit, and worry about validating the specific content of the text later 
> on (eg. in the parser or calling code).
> 
> Which is what I said before, and what the rule I specified does.
> 

I just don't want to change the existing logic.

However i tried your solution, but i've got:
     [antlr] ANTLR Parser Generator   Version 2.7.6 (2005-12-22)   1989-2005
     [antlr] ..util/parser/SQL.g:54:30: This subrule cannot be inverted. 
  Only subrules of the form:
     [antlr]     (T1|T2|T3...) or
     [antlr]     ('c1'|'c2'|'c3'...)
     [antlr] may be inverted (ranges are also allowed).
     [antlr] Exiting due to errors.


I also tried to make a my own STRING using your suggestion using:
STRING : "'"((JOLLY)?(PAROLE|INTEGER|ESCAPE)(JOLLY)?)"'";
protected ESCAPE    : ("\\" ("'")?)+;


but it gives me
     [antlr] ANTLR Parser Generator   Version 2.7.6 (2005-12-22)   1989-2005
     [antlr] SQL.g:35:29: warning:lexical nondeterminism between alts 1 
and 2 of block upon
     [antlr] SQL.g:35:29:     k==1:'\''
     [antlr] SQL.g:35:29:     k==2:'\'','*','\\'
     [antlr] SQL.g:35:29:     k==3:<end-of-token>,'\'','*','\\'
     [antlr] SQL.g:35:29:     k==4:<end-of-token>,'\'','*','\\'



Here is the full modified SQL.g:
***
// Grammatica per il parser delle condizioni SQL where
// Usare ant-antlr.xml per generare le classi
// Manca supporto date + isnull
// @author Fede

// In ogni classe generata deve esserci il package
header{
package it.ibc.jstore.util.parser;
}

// Lexer ********************************************
{import it.ibc.jstore.base.Log;}
class RestrictionsLexer extends Lexer;

options { k=4; }

WHITESPACE	: (' '
			| '\t'
			| '\r' '\n' { newline(); }
			| '\n'      { newline(); }
			) { $setType(Token.SKIP); }
			;

protected SPAZIO	: ' ';
protected CIFRA		: '0'..'9';
protected LETTERA	: ('a'..'z'|'A'..'Z'|'_'|'.'|'-');
protected PAROLA	: LETTERA(CIFRA|LETTERA)*;
protected PAROLE	: PAROLA((SPAZIO)+(PAROLA))*;
protected INTEGER	: (CIFRA)+;
protected LONG		: INTEGER('L'|'l');
protected LIKE		: ("LIKE"|"like"|"Like");
protected OR		: ("OR"|"or"|"Or");
protected AND		: ("AND"|"and"|"And");
protected IN		: ("IN"|"in"|"In");
protected ESCAPE    : ("\\" ("'")?)+;



UGUALE			: "=";
DIVERSO			: "<>";
MAGGIORE		: '>';
MINORE			: '<';
MAGGIOREUGUALE	: ">=";
MINOREUGUALE	: "<=";
JOLLY			: "*";
LPAREN			: '(';
RPAREN			: ')';
SEPARATORE		: ('/');
VIRGOLA			: ",";
NUMERO : (LONG) => LONG { $setType(LONG); }
	 | INTEGER { $setType(INTEGER); }
	 ;

STRING : "'"((JOLLY)?(PAROLE|INTEGER|ESCAPE)(JOLLY)?)"'";
CAMPO : (LIKE) => LIKE { $setType(LIKE); }
	| (OR) => OR { $setType(OR); }
	| (AND) => AND { $setType(AND); }
	| (IN) => IN { $setType(IN); }
	| PAROLA { $setType(CAMPO); }
	;

// Parser *******************************************
class RestrictionsParser extends Parser;
options { buildAST=true; }

valore	 : STRING | LONG | INTEGER;
expr     : LPAREN^ orExpr RPAREN! ;
orExpr   : andExpr ((OR^) andExpr)* ;
andExpr  : relExpr ((AND^) relExpr)* ;
relExpr  : atom 
(((UGUALE^|DIVERSO^|MAGGIORE^|MINORE^|MINOREUGUALE^|MAGGIOREUGUALE^|LIKE^) 
rparm) | (IN^ list))* ;
atom     : CAMPO | expr ;
rparm    : atom | valore ;
list	 : LPAREN! valore (VIRGOLA^ valore)* RPAREN! ;


// Parser dell'albero *******************************
{
import it.ibc.jstore.data.Restrictions;
import it.ibc.jstore.data.MatchMode;
import java.util.List;
import java.util.ArrayList;
}
class RestrictionsTreeWalker extends TreeParser;

// Elemento base (un campo, un intero..)
base returns [Object s]
	{ s=null; }
	: i:CAMPO { s=i.getText(); }
	| j:INTEGER { s=Integer.valueOf(j.getText()); }
	| k:LONG { int lunghezza=k.getText().length(); 
s=Long.valueOf(k.getText().substring(0,lunghezza-1)); }
	| l:STRING { int lunghezza=l.getText().length(); 
s=l.getText().substring(1,lunghezza-1); }
	;

campo returns [String s]
	{ s=null; }
	: i:CAMPO { s=i.getText(); }
	;

stringa returns [String s]
	{ s=null; }
	: l:STRING { int lunghezza=l.getText().length(); 
s=l.getText().substring(1,lunghezza-1); }
	;
	
lista returns [List l]
	{ l=new ArrayList(); List t,v; Object a; }
	: #(VIRGOLA v=lista t=lista) { l.addAll(v); l.addAll(t); } // Una lista 
e' un'elenco di liste separate da virgola
	| a=base { l.add(a); } // E questo e' l'elemento base della lista
	;

expr returns [Restrictions r]
   { Object a,b; Restrictions t,v; r=new Restrictions(); }
   : #(UGUALE a=base b=base) { r.eq((String)a,b); }
   | #(DIVERSO a=base b=base) { r.ne((String)a,b); }
   | #(MINOREUGUALE a=base b=base) { r.le((String)a,b); }
   | #(MAGGIOREUGUALE a=base b=base) { r.ge((String)a,b); }
   | #(MINORE a=base b=base) { r.lt((String)a,b); }
   | #(MAGGIORE a=base b=base) { r.gt((String)a,b); }
   | #(LIKE a=campo b=stringa) { r.ilike((String)a,(String)b, 
MatchMode.GUESS); }
   | #(IN a=campo b=lista) { r.in((String)a,(List)b); }
   | #(AND t=expr v=expr) {r.and(t,v);}
   | #(OR t=expr v=expr) {r.or(t,v);}
   | #(LPAREN t=expr) { r=t; }
   ;



Thanks for any help !
Emanuele



More information about the antlr-interest mailing list