[antlr-interest] Escaping quotes in a lexer

Emanuele Gesuato emanuele.gesuato at gmail.com
Tue Mar 17 15:05:13 PDT 2009


Hi there,

I'm quite new to the antlr world so my question could be obvious. I'm
using antlr 2.7.6 in java 5 for generating a lexer class. In this
lexer (written by an ex-collegue) i'm trying to resolve string like

Invoice.customer='Tom'

to build an hibernate restriction.

I would like to use the ' character inside the string something similar to:

Invoice.customer='Tom L\'oreal'
or (better)
Invoice.customer="Tom L'oreal"

I've got the String definition for such fields that is the following:
STRING : "'"((JOLLY)?(PAROLE|INTEGER)(JOLLY)?)"'";
where:
protected CIFRA        : '0'..'9';
protected LETTERA    : ('a'..'z'|'A'..'Z'|'_'|'\\'|'.'|'-');
protected PAROLA    : LETTERA(CIFRA|LETTERA)*;
protected PAROLE    : PAROLA((SPAZIO)+(PAROLA))*;
protected INTEGER    : (CIFRA)+;

and i've tried to use:
STRING : (" ' "((JOLLY)?(PAROLE|INTEGER)(JOLLY)?)" ' ") | (' "
'((JOLLY)?(PAROLE|INTEGER)(JOLLY)?)' " ');

(added spaces for more clarity) but it does recognize the string
"Tom L'oreal". The java class created is no different from the previous one.



Here is the original full grammar:
*************************
header{
package it.ibc.jstore.util.parser;
}

// Lexer ********************************************
{import it.ibc.jstore.base.Log;}
class RestrictionsLexer extends Lexer;

options { k=4; }

WHITESPACE    : (' '
            | '\t'
            | '\r' '\n' { newline(); }
            | '\n'      { newline(); }
            ) { $setType(Token.SKIP); }
            ;

protected SPAZIO    : ' ';
protected CIFRA        : '0'..'9';
protected LETTERA    : ('a'..'z'|'A'..'Z'|'_'|'\\'|'.'|'-');
protected PAROLA    : LETTERA(CIFRA|LETTERA)*;
protected PAROLE    : PAROLA((SPAZIO)+(PAROLA))*;
protected INTEGER    : (CIFRA)+;
protected LONG        : INTEGER('L'|'l');
protected LIKE        : ("LIKE"|"like"|"Like");
protected OR        : ("OR"|"or"|"Or");
protected AND        : ("AND"|"and"|"And");
protected IN        : ("IN"|"in"|"In");




UGUALE            : "=";
DIVERSO            : "<>";
MAGGIORE        : '>';
MINORE            : '<';
MAGGIOREUGUALE    : ">=";
MINOREUGUALE    : "<=";
JOLLY            : "*";
LPAREN            : '(';
RPAREN            : ')';
SEPARATORE        : ('/');
VIRGOLA            : ",";
NUMERO : (LONG) => LONG { $setType(LONG); }
     | INTEGER { $setType(INTEGER); }
     ;
STRING : "'"((JOLLY)?(PAROLE|INTEGER)(JOLLY)?)"'"
CAMPO : (LIKE) => LIKE { $setType(LIKE); }
    | (OR) => OR { $setType(OR); }
    | (AND) => AND { $setType(AND); }
    | (IN) => IN { $setType(IN); }
    | PAROLA { $setType(CAMPO); }
    ;

// Parser *******************************************
class RestrictionsParser extends Parser;
options { buildAST=true; }

valore     : STRING | LONG | INTEGER;
expr     : LPAREN^ orExpr RPAREN! ;
orExpr   : andExpr ((OR^) andExpr)* ;
andExpr  : relExpr ((AND^) relExpr)* ;
relExpr  : atom
(((UGUALE^|DIVERSO^|MAGGIORE^|MINORE^|MINOREUGUALE^|MAGGIOREUGUALE^|LIKE^)
rparm) | (IN^ list))* ;
atom     : CAMPO | expr ;
rparm    : atom | valore ;
list     : LPAREN! valore (VIRGOLA^ valore)* RPAREN! ;


// Parser dell'albero *******************************
{
import it.ibc.jstore.data.Restrictions;
import it.ibc.jstore.data.MatchMode;
import java.util.List;
import java.util.ArrayList;
}
class RestrictionsTreeWalker extends TreeParser;

// Elemento base (un campo, un intero..)
base returns [Object s]
    { s=null; }
    : i:CAMPO { s=i.getText(); }
    | j:INTEGER { s=Integer.valueOf(j.getText()); }
    | k:LONG { int lunghezza=k.getText().length();
s=Long.valueOf(k.getText().substring(0,lunghezza-1)); }
    | l:STRING { int lunghezza=l.getText().length();
s=l.getText().substring(1,lunghezza-1); }
    ;

campo returns [String s]
    { s=null; }
    : i:CAMPO { s=i.getText(); }
    ;

stringa returns [String s]
    { s=null; }
    : l:STRING { int lunghezza=l.getText().length();
s=l.getText().substring(1,lunghezza-1); }
    ;

lista returns [List l]
    { l=new ArrayList(); List t,v; Object a; }
    : #(VIRGOLA v=lista t=lista) { l.addAll(v); l.addAll(t); } // Una
lista e' un'elenco di liste separate da virgola
    | a=base { l.add(a); } // E questo e' l'elemento base della lista
    ;

expr returns [Restrictions r]
  { Object a,b; Restrictions t,v; r=new Restrictions(); }
  : #(UGUALE a=base b=base) { r.eq((String)a,b); }
  | #(DIVERSO a=base b=base) { r.ne((String)a,b); }
  | #(MINOREUGUALE a=base b=base) { r.le((String)a,b); }
  | #(MAGGIOREUGUALE a=base b=base) { r.ge((String)a,b); }
  | #(MINORE a=base b=base) { r.lt((String)a,b); }
  | #(MAGGIORE a=base b=base) { r.gt((String)a,b); }
  | #(LIKE a=campo b=stringa) { r.ilike((String)a,(String)b, MatchMode.GUESS); }
  | #(IN a=campo b=lista) { r.in((String)a,(List)b); }
  | #(AND t=expr v=expr) {r.and(t,v);}
  | #(OR t=expr v=expr) {r.or(t,v);}
  | #(LPAREN t=expr) { r=t; }
  ;


More information about the antlr-interest mailing list