[antlr-interest] nondeterminism warning

Matthew Ford Matthew.Ford at forward.com.au
Sat Jan 18 01:17:18 PST 2003


try the following
// test.in

"test c" and b or c not "d"

C:\temp\antlrTest>java -cp .;c:\antlr\antlr.jar Main <test.in
["test\ c",<7>,line=1,col=1]
["and",<7>,line=1,col=10]
["b",<7>,line=1,col=14]
["or",<7>,line=1,col=16]
["c",<7>,line=1,col=19]
["not",<7>,line=1,col=21]
["d",<7>,line=1,col=25]


// Main.java
import java.io.*;
import antlr.Token;

class Main implements QueryLexerTokenTypes {
 public static void main(String[] args) {
  try {
   QueryLexer lexer = new QueryLexer(new DataInputStream(System.in));
   Token t=lexer.nextToken();
   while (t.getType() != EOF) {
     System.out.println(t);
    t=lexer.nextToken();
   }
  } catch(Exception e) {
   System.err.println("exception: "+e);
   throw new RuntimeException(e);
  }
 }
}



//QueryLexer.g
class QueryLexer extends Lexer;

options
{
charVocabulary = '\3'..'\377';
k=3;
}

tokens {
  AND = "AND";
  OR = "OR";
  NOT = "NOT";
}


TERM
: PHRASE
| WORD
| (WS { $setType(Token.SKIP); })+
;


protected PHRASE
: '"'! WORD ( WS! {$append("\\ ");} (WORD)? )*
'"'!
;


protected WORD
: (LETTER)+
;

protected WS
: (' ' | '\t')
;

protected LETTER
    :   '\u0024' |
        '\u0041'..'\u005a' |
        '\u005f' |
        '\u0061'..'\u007a' |
        '\u00c0'..'\u00d6' |
        '\u00d8'..'\u00f6' |
        '\u00f8'..'\u00ff' |
        '\u0100'..'\u1fff' |
        '\u3040'..'\u318f' |
        '\u3300'..'\u337f' |
        '\u3400'..'\u3d2d' |
        '\u4e00'..'\u9fff' |
        '\uf900'..'\ufaff'
    ;


----- Original Message -----
From: <chantal.ackermann at web.de>
To: <antlr-interest at yahoogroups.com>
Sent: Thursday, January 16, 2003 9:21 PM
Subject: [antlr-interest] nondeterminism warning


> hello all,
>
> I am very new to parser generators and ANTLR. I am trying to get my
> first Lexer compiled.
>
> The Lexer should recognize expressions for a search query:
>
> - Phrases: in double quotes (like "one phrase"), any white space shall
> be escaped (like "one\\ phrase").
> - boolean operators: "AND", "OR", "NOT"
> - single words (not "AND", "OR", "NOT" *sigh*)
>
> outside a phrase white space shall be ignored.
>
> this is my current Lexer which I am not able to improve further to get
> rid of the warnings:
>
> /******************** LEXER **************************/
>
> class QueryLexer extends Lexer;
>
> options
> {
> charVocabulary = '\3'..'\377';
> k=3;
> }
>
> {
> private boolean isPhrase = false;
> }
>
> TERM
> : PHRASE
> | ( AND ) => { $setType(Token.AND); }
> | WORD
> | WS { $setType(Token.SKIP); }
> | { System.out.println("error: " + $getText()); }
> ;
>
> AND
> : { this.isPhrase == false }? "AND"
> ;
>
> OR
> : { this.isPhrase == false }? "OR"
> ;
>
> NOT
> : { this.isPhrase == false }? "NOT"
> ;
>
> protected PHRASE
> : '"'! { this.isPhrase = true; } WORD ( WS! { $append("\\ "); } WORD
> )* (WS!)?
> '"'! { this.isPhrase = false; }
> ;
>
> protected WORD
> : (LETTER)+
> ;
>
> protected WS
> : (' ' | '\t')+
> ;
>
> protected LETTER
>     :   '\u0024' |
>         '\u0041'..'\u005a' |
>         '\u005f' |
>         '\u0061'..'\u007a' |
>         '\u00c0'..'\u00d6' |
>         '\u00d8'..'\u00f6' |
>         '\u00f8'..'\u00ff' |
>         '\u0100'..'\u1fff' |
>         '\u3040'..'\u318f' |
>         '\u3300'..'\u337f' |
>         '\u3400'..'\u3d2d' |
>         '\u4e00'..'\u9fff' |
>         '\uf900'..'\ufaff'
>     ;
>
> /***************** LEXER END **********************/
>
> I get these warnings:
>
> antlr:
>     [antlr] ANTLR Parser Generator   Version 2.7.2rc2 (20030105)
> 1989-2003 jGuru.com
>     [antlr] QueryParser.g: warning:lexical nondeterminism between
> rules TERM and AND upon
>     [antlr] QueryParser.g:     k==1:'A'
>     [antlr] QueryParser.g:     k==2:'N'
>     [antlr] QueryParser.g:     k==3:'D'
>     [antlr] QueryParser.g: warning:lexical nondeterminism between
> rules TERM and OR upon
>     [antlr] QueryParser.g:     k==1:'O'
>     [antlr] QueryParser.g:     k==2:'R'
>     [antlr] QueryParser.g:     k==3:<end-of-token>
>     [antlr] QueryParser.g: warning:lexical nondeterminism between
> rules TERM and NOT upon
>     [antlr] QueryParser.g:     k==1:'N'
>     [antlr] QueryParser.g:     k==2:'O'
>     [antlr] QueryParser.g:     k==3:'T'
>     [antlr] warning: public lexical rule TERM is optional (can match
> "nothing")
>     [antlr] QueryParser.g:75: warning:lexical nondeterminism upon
>     [antlr] QueryParser.g:75:     k==1:'\t',' '
>     [antlr] QueryParser.g:75:     k==2:'\t',' '
>     [antlr] QueryParser.g:75:     k==3:'\t',' ','"'
>     [antlr] QueryParser.g:75:     between alt 1 and exit branch of block
>
> +++++++++++++++++++++++
>
> I changed k to 3 in hope it would solve the nondeterminism but that
> changes basically nothing. It adds only the lines with k==2 and k==3
> to the warning output.
>
> I do understand that "AND", "OR", "NOT" can match as WORD but I'am not
> able to tell antlr to first try to match AND, OR, NOT and then WORD. I
> tried with syntactic predicates in different places, but that didn't
> change anything.
>
> The last warning is annoying: where is the error in the WS rule? I
> can't find anything wrong in it?
>
> I would greatly appreciate any hint, tip, suggestion, solution...!
>
> regards,
> Chantal
>
>
>
>
> Your use of Yahoo! Groups is subject to http://docs.yahoo.com/info/terms/
>
>
>


 

Your use of Yahoo! Groups is subject to http://docs.yahoo.com/info/terms/ 



More information about the antlr-interest mailing list