[antlr-interest] Escaping single quotes in a lexer
Emanuele Gesuato
egesuato at ibc.it
Wed Apr 1 00:57:26 PDT 2009
Gavin Lambert wrote:
> At 21:54 30/03/2009, Emanuele Gesuato wrote:
> >What if i want to maintain as inalterate as possibile the original
> >STRING ?
> >
> >STRING : "'"((JOLLY)?(PAROLE|INTEGER)(JOLLY)?)"'";
> >
> >Is it possible to change it to something similar to
> >
> >STRING : "'"((JOLLY)?(PAROLE|INTEGER|ESCAPE)(JOLLY)?)"'";
> >
> >Where ESCAPE is a regexp to match the escaping of single quotes ?
>
> Yes, it's possible, but you shouldn't do that. It's best to match the
> entire string (defined as "anything within quotes") as a single lexical
> unit, and worry about validating the specific content of the text later
> on (eg. in the parser or calling code).
>
> Which is what I said before, and what the rule I specified does.
>
I just don't want to change the existing logic.
However i tried your solution, but i've got:
[antlr] ANTLR Parser Generator Version 2.7.6 (2005-12-22) 1989-2005
[antlr] ..util/parser/SQL.g:54:30: This subrule cannot be inverted.
Only subrules of the form:
[antlr] (T1|T2|T3...) or
[antlr] ('c1'|'c2'|'c3'...)
[antlr] may be inverted (ranges are also allowed).
[antlr] Exiting due to errors.
I also tried to make a my own STRING using your suggestion using:
STRING : "'"((JOLLY)?(PAROLE|INTEGER|ESCAPE)(JOLLY)?)"'";
protected ESCAPE : ("\\" ("'")?)+;
but it gives me
[antlr] ANTLR Parser Generator Version 2.7.6 (2005-12-22) 1989-2005
[antlr] SQL.g:35:29: warning:lexical nondeterminism between alts 1
and 2 of block upon
[antlr] SQL.g:35:29: k==1:'\''
[antlr] SQL.g:35:29: k==2:'\'','*','\\'
[antlr] SQL.g:35:29: k==3:<end-of-token>,'\'','*','\\'
[antlr] SQL.g:35:29: k==4:<end-of-token>,'\'','*','\\'
Here is the full modified SQL.g:
***
// Grammatica per il parser delle condizioni SQL where
// Usare ant-antlr.xml per generare le classi
// Manca supporto date + isnull
// @author Fede
// In ogni classe generata deve esserci il package
header{
package it.ibc.jstore.util.parser;
}
// Lexer ********************************************
{import it.ibc.jstore.base.Log;}
class RestrictionsLexer extends Lexer;
options { k=4; }
WHITESPACE : (' '
| '\t'
| '\r' '\n' { newline(); }
| '\n' { newline(); }
) { $setType(Token.SKIP); }
;
protected SPAZIO : ' ';
protected CIFRA : '0'..'9';
protected LETTERA : ('a'..'z'|'A'..'Z'|'_'|'.'|'-');
protected PAROLA : LETTERA(CIFRA|LETTERA)*;
protected PAROLE : PAROLA((SPAZIO)+(PAROLA))*;
protected INTEGER : (CIFRA)+;
protected LONG : INTEGER('L'|'l');
protected LIKE : ("LIKE"|"like"|"Like");
protected OR : ("OR"|"or"|"Or");
protected AND : ("AND"|"and"|"And");
protected IN : ("IN"|"in"|"In");
protected ESCAPE : ("\\" ("'")?)+;
UGUALE : "=";
DIVERSO : "<>";
MAGGIORE : '>';
MINORE : '<';
MAGGIOREUGUALE : ">=";
MINOREUGUALE : "<=";
JOLLY : "*";
LPAREN : '(';
RPAREN : ')';
SEPARATORE : ('/');
VIRGOLA : ",";
NUMERO : (LONG) => LONG { $setType(LONG); }
| INTEGER { $setType(INTEGER); }
;
STRING : "'"((JOLLY)?(PAROLE|INTEGER|ESCAPE)(JOLLY)?)"'";
CAMPO : (LIKE) => LIKE { $setType(LIKE); }
| (OR) => OR { $setType(OR); }
| (AND) => AND { $setType(AND); }
| (IN) => IN { $setType(IN); }
| PAROLA { $setType(CAMPO); }
;
// Parser *******************************************
class RestrictionsParser extends Parser;
options { buildAST=true; }
valore : STRING | LONG | INTEGER;
expr : LPAREN^ orExpr RPAREN! ;
orExpr : andExpr ((OR^) andExpr)* ;
andExpr : relExpr ((AND^) relExpr)* ;
relExpr : atom
(((UGUALE^|DIVERSO^|MAGGIORE^|MINORE^|MINOREUGUALE^|MAGGIOREUGUALE^|LIKE^)
rparm) | (IN^ list))* ;
atom : CAMPO | expr ;
rparm : atom | valore ;
list : LPAREN! valore (VIRGOLA^ valore)* RPAREN! ;
// Parser dell'albero *******************************
{
import it.ibc.jstore.data.Restrictions;
import it.ibc.jstore.data.MatchMode;
import java.util.List;
import java.util.ArrayList;
}
class RestrictionsTreeWalker extends TreeParser;
// Elemento base (un campo, un intero..)
base returns [Object s]
{ s=null; }
: i:CAMPO { s=i.getText(); }
| j:INTEGER { s=Integer.valueOf(j.getText()); }
| k:LONG { int lunghezza=k.getText().length();
s=Long.valueOf(k.getText().substring(0,lunghezza-1)); }
| l:STRING { int lunghezza=l.getText().length();
s=l.getText().substring(1,lunghezza-1); }
;
campo returns [String s]
{ s=null; }
: i:CAMPO { s=i.getText(); }
;
stringa returns [String s]
{ s=null; }
: l:STRING { int lunghezza=l.getText().length();
s=l.getText().substring(1,lunghezza-1); }
;
lista returns [List l]
{ l=new ArrayList(); List t,v; Object a; }
: #(VIRGOLA v=lista t=lista) { l.addAll(v); l.addAll(t); } // Una lista
e' un'elenco di liste separate da virgola
| a=base { l.add(a); } // E questo e' l'elemento base della lista
;
expr returns [Restrictions r]
{ Object a,b; Restrictions t,v; r=new Restrictions(); }
: #(UGUALE a=base b=base) { r.eq((String)a,b); }
| #(DIVERSO a=base b=base) { r.ne((String)a,b); }
| #(MINOREUGUALE a=base b=base) { r.le((String)a,b); }
| #(MAGGIOREUGUALE a=base b=base) { r.ge((String)a,b); }
| #(MINORE a=base b=base) { r.lt((String)a,b); }
| #(MAGGIORE a=base b=base) { r.gt((String)a,b); }
| #(LIKE a=campo b=stringa) { r.ilike((String)a,(String)b,
MatchMode.GUESS); }
| #(IN a=campo b=lista) { r.in((String)a,(List)b); }
| #(AND t=expr v=expr) {r.and(t,v);}
| #(OR t=expr v=expr) {r.or(t,v);}
| #(LPAREN t=expr) { r=t; }
;
Thanks for any help !
Emanuele
More information about the antlr-interest
mailing list