[antlr-interest] How to ignore tokens during lexingh

James Robson james.robson at ymail.com
Mon Apr 27 22:35:59 PDT 2009


Hi,

I've put together a grammar to parse a URN.

In the grammar I have some tokens defined, however while processing one of the parser rules for an arbitary string it exits the parse and attempts to match one of the tokens based on the first two letters, this is regardless of the look ahead specified or if i split out the grammar and turn on filters.

as an example one of the tokens defined is "metadata" - this is defined in the tokens section.

while parsing the urn and parsing the parameters section, it runs into "myname=james" which should match name EQUALS value. Instead it attemts to match 'me=' aginst metadata and failes and gives me instad mynajames as the result of the name EQUALS value rule.

Having the tokens defined as tokens or lexer rules makes no difference, not that I really expected it to.

Maybe I'm taking a totally wrong approach here, can anyone provde some advice?

Thanks in advance

Below are the parser and lexer.


grammar RestTalk;

options {
    backtrack=true;
    memoize=true;
}

import RestTalkLex16;



uri     :    webspacePath | restalkPath ('?' resttalkQuery)?
    ;
    
webspacePath 
    :    PATHSEP (WEBSPACE | XCLIENT) PATHSEP webspaceOrigin
    ;
    
restalkPath
     :    PATHSEP | (PATHSEP (application | objResourceId (PATHSEP graph)?))?
    ;
    
application 
    :    applicationNamespace (PATHSEP (origin  | objResourceId (PATHSEP graph)?))?
    ;

objResourceId
    : ORI
    ;

    
applicationNamespace
    :     DOCSEARCH | SEARCH | WEBSEARCH | XCLIENT
    ;

webspaceOrigin
    :    graph PATHSEP objResourceId (PATHSEP name)*
    ;

origin    :    rootNode  (PATHSEP name)*
    ;
    
    
rootNode 
    :     name | TILDE name
    
    ;
    
name
options {greedy=true;}
     :    (~(METADATA))=>(LOWERALPHA|UPPERALPHA |'_') (LOWERALPHA|UPPERALPHA|DIGIT|'_'|'.')*
    ;
    
graph     :    GRAPHS PATHSEP (FOLDERS | RECORDSBYFOLDER | PHYSICALBYFOLDER | PHYSICALBYREPOSITORY | KEYWORDS)
    ;

resttalkQuery 
    :    (textQuery | metadataQuery | plusmetadataQuery ) (remainder)?
    ;
    
textQuery
    :    TEXT EQUALS searchTerm ('&' standardParams)* ('&' ( metadataQuery | plusmetadataQuery) )?
    ;

searchTerm 
    :    (pchar)+ |'"' pchar+ '"'
    ;
    
metadataQuery 
    :    'metadata' EQUALS expression ('&' standardParams)?
    ;

plusmetadataQuery
    :    '+metadata' EQUALS expression ('&' standardParams)?
    ;

standardParams 
    :    standardParam ('&' standardParam)
    ;
    
standardParam
    :    BEHAVIOURS EQUALS commaSeparatedList
    |    DEPTH EQUALS (ALL | DIGIT+)
    |    DISPLAY EQUALS commaSeparatedList
    |    NUMRESULTS EQUALS DIGIT+
    |    OBJECTTYPES EQUALS commaSeparatedList
    |    PAGESIZE  EQUALS DIGIT+
    |    SCOPE EQUALS commaSeparatedList
    |    SORTBY EQUALS name
    |    SORTORDER EQUALS name
    |    SYNTAX EQUALS name
    |    ZONES EQUALS name
    ;
    
commaSeparatedList 
    :    pchar+ (',' pchar+)+ | '"' pchar+ (',' pchar+)+ '"'
    ;
    
remainder
    :     ('&' remainderParam)+ 
    ;

remainderParam
options {
    backtrack=true;
    memoize=true;
}
    :    paramName EQUALS paramValue
    ;

paramName
    :    name
    ;
    
paramValue
    :    pchar+
    ;
    
expression 
    :    condition (logicalOperator ' ' condition)*
    ;
condition 
    :    name ' ' operator ' ' fieldValue
    ;
    
fieldValue
    :    ('"' (unreserved|' ')+ '"' | unreserved+)
    ;

operator 
    :    (UPPERALPHA | LOWERALPHA)+ | ('"' (UPPERALPHA | LOWERALPHA)+ (' ' (UPPERALPHA | LOWERALPHA)+)+ '"')
    ;

pcharns 
    :    unreserved |PCTENCODED 
    ;
    
pchar     :    pcharns | SUBDELIMS | ':' | '@'
    ;

unreserved 
    :    UPPERALPHA | LOWERALPHA | DIGIT | '-' | '.' | '_' | '~'
    ;
    
logicalOperator 
    :    ANDLOWER|ANDUPPER|ORLOWER|ORUPPER
    ;


lexer grammar RestTalkLex16;
options {
    filter=true;
    k=20;
}

EQUALS     :    '=';
PATHSEP :    '/'    ;
XCLIENT : 'xclient';
METADATA : 'metadata';
PLUSMETADATA : '+metadata';
WEBSEARCH : 'websearch';
WEBSPACE : 'webspace';
DOCSEARCH : 'docsearch';
SEARCH : 'search';
SERVICES : 'services';
GRAPHS : 'graphs';
FOLDERS : 'folders';
RECORDSBYFOLDER : 'recordsbyfolder';
PHYSICALBYFOLDER : 'physicalbyfolder';
PHYSICALBYREPOSITORY : 'physicalbyrepository';
KEYWORDS : 'keywords';
DOCS : 'docs';
GLOBALFOLDER : 'ObjectiveGlobalFolder';
TILDE:'~';
IDPREFIX : 'id:';
ANDLOWER : 'and';
ORLOWER : 'or';
ANDUPPER : 'AND';
ORUPPER :'OR';
ALL : 'all';

TEXT : 'text';
BEHAVIOURS : 'behaviours';
DEPTH : 'depth';
DISPLAY : 'display';
NUMRESULTS : 'numresults';
OBJECTTYPES : 'objecttypes';
PAGESIZE : 'pagesize';
SCOPE : 'scope';
SORTBY : 'sortby';
SORTORDER : 'sortorder';
SYNTAX : 'syntax';
ZONES : 'zones';
    
ORI     :    IDPREFIX (LOWERALPHA)* (UPPERALPHA)+ DIGIT+
    ;
    
PCTENCODED 
    :    '%' (DIGIT|'A'..'F'|'a'..'f') (DIGIT|'A'..'F'|'a'...'f')
    ;

DIGIT     :    '0'..'9'
    ;

LOWERALPHA 
    :    'a'..'z'
    ;
    
UPPERALPHA 
    :    'A'..'Z'
    ;
    
SUBDELIMS 
    :     ('!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | ';' | '=')
    ;


      Enjoy a better web experience. Upgrade to the new Internet Explorer 8 optimised for Yahoo!7. Get it now.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://www.antlr.org/pipermail/antlr-interest/attachments/20090427/c4e1033d/attachment.html 


More information about the antlr-interest mailing list