[antlr-interest] Need help with an address parser
Jeff Bergman
jbergmanster at gmail.com
Thu Jun 22 13:55:18 PDT 2006
I am trying to build an address parser but I am having problems. The
grammar seems to choke on my streetDesignator and streetDesignatorLong in
the Parser section. I tried putting these in the lexer but fared no
better. I am not sure how to handle these. I need them to be separated
because I have to eventually convert the long version to the abbreviated
designators when parsing the address.
options {
language = "CSharp";
}
class AddressParser extends Parser;
options {
buildAST = true; // uses CommonAST by default
}
address
: poBox | simpleStreetAddress
;
poBox: "PO" boxDesignator NUMBER
;
simpleStreetAddress: NUMBER (IDENT | streetDesignatorLong |
directionDesignator)+ (streetDesignator | streetDesignatorLong )
(directionDesignator)? (unitDesignator (NUMBER | IDENT)? )?
;
boxDesignator: "BOX";
unitDesignator :
("APT" |
"BASE" |
"BLDG" |
"BSMT" |
"DEPT" |
"FL" |
"FRNT" |
"HNGR" |
"LBBY" |
"LOT" |
"LOWR" |
"OFC" |
"PH" |
"PIER" |
"POB" |
"REAR" |
"RM" |
"SIDE" |
"SLIP" |
"SPC" |
"STE" |
"STOP" |
"TRLR" |
"UNIT" |
"UPPR")
;
directionDesignator:
("E" |
"N" |
"NE" |
"NW" |
"S" |
"SE" |
"SW" |
"W")
;
streetDesignator:
"ALY" |
"ANX" |
"ARC" |
"AVE" |
"BCH" |
"BG" |
"BGS" |
"BLF" |
"BLFS" |
"BLVD" |
"BND" |
"BR" |
"BRG" |
"BRK" |
"BRKS" |
"BTM" |
"BYP" |
"BYU" |
"CIR" |
"CIRS" |
"CLB" |
"CLF" |
"CLFS" |
"CMN" |
"COR" |
"CORS" |
"CP" |
"CPE" |
"CRES" |
"CRK" |
"CRSE" |
"CRST" |
"CSWY" |
"CT" |
"CTR" |
"CTRS" |
"CTS" |
"CURV" |
"CV" |
"CVS" |
"CYN" |
"DL" |
"DM" |
"DR" |
"DRS" |
"DV" |
"EST" |
"ESTS" |
"EXPY" |
"EXT" |
"EXTS" |
"FALL" |
"FLD" |
"FLDS" |
"FLS" |
"FLT" |
"FLTS" |
"FRD" |
"FRDS" |
"FRG" |
"FRGS" |
"FRK" |
"FRKS" |
"FRST" |
"FRY" |
"FT" |
"FWY" |
"GDN" |
"GDNS" |
"GLN" |
"GLNS" |
"GRN" |
"GRNS" |
"GRV" |
"GRVS" |
"GTWY" |
"HBR" |
"HBRS" |
"HL" |
"HLS" |
"HOLW" |
"HTS" |
"HVN" |
"HWY" |
"INLT" |
"IS" |
"ISLE" |
"ISS" |
"JCT" |
"JCTS" |
"KNL" |
"KNLS" |
"KY" |
"KYS" |
"LAND" |
"LCK" |
"LCKS" |
"LDG" |
"LF" |
"LGT" |
"LGTS" |
"LK" |
"LKS" |
"LN" |
"LNDG" |
"LOOP" |
"MALL" |
"MDW" |
"MDWS" |
"MEWS" |
"ML" |
"MLS" |
"MNR" |
"MNRS" |
"MSN" |
"MT" |
"MTN" |
"MTNS" |
"MTWY" |
"NCK" |
"OPAS" |
"ORCH" |
"OVAL" |
"PARK" |
"PASS" |
"PATH" |
"PIKE" |
"PKWY" |
"PL" |
"PLN" |
"PLNS" |
"PLZ" |
"PNE" |
"PNES" |
"PR" |
"PRT" |
"PRTS" |
"PSGE" |
"PT" |
"PTS" |
"RADL" |
"RAMP" |
"RD" |
"RDG" |
"RDGS" |
"RDS" |
"RIV" |
"RNCH" |
"ROW" |
"RPD" |
"RPDS" |
"RR" |
"RST" |
"RTE" |
"RUE" |
"RUN" |
"SHL" |
"SHLS" |
"SHR" |
"SHRS" |
"SKWY" |
"SMT" |
"SPG" |
"SPGS" |
"SPUR" |
"SQ" |
"SQS" |
"ST" |
"STA" |
"STRA" |
"STRM" |
"STS" |
"TER" |
"TPKE" |
"TRAK" |
"TRCE" |
"TRFY" |
"TRL" |
"TRWY" |
"TUNL" |
"UN" |
"UNS" |
"UPAS" |
"VIA" |
"VIS" |
"VL" |
"VLG" |
"VLGS" |
"VLY" |
"VLYS" |
"VW" |
"VWS" |
"WALK" |
"WALL" |
"WAY" |
"WAYS" |
"WL" |
"WLS" |
"XING" |
"XRD" |
;
streetDesignatorLong:
"ALLEY" |
"ANNEX" |
"ARCADE" |
"AVENUE" |
"BEACH" |
"BURG" |
"BURGS" |
"BLUFF" |
"BLUFFS" |
"BOULEVARD" |
"BEND" |
"BRANCH" |
"BRIDGE" |
"BROOK" |
"BROOKS" |
"BOTTOM" |
"BYPASS" |
"BAYOU" |
"BAYOO" |
"CIRCLE" |
"CIRCLES" |
"CLUB" |
"CLIFF" |
"CLIFFS" |
"COMMON" |
"CORNER" |
"CORNERS" |
"CAMP" |
"CAPE" |
"CRESCENT" |
"CREEK" |
"COURSE" |
"CREST" |
"CAUSEWAY" |
"COURT" |
"CENTER" |
"CENTERS" |
"COURTS" |
"CURVE" |
"COVE" |
"COVES" |
"CANYON" |
"DALE" |
"DAM" |
"DRIVE" |
"DRIVES" |
"DIVIDE" |
"ESTATE" |
"ESTATES" |
"EXPRESSWAY" |
"EXTENSION" |
"EXTENSIONS" |
"FIELD" |
"FIELDS" |
"FALLS" |
"FLAT" |
"FLATS" |
"FORD" |
"FORDS" |
"FORGE" |
"FORGES" |
"FORK" |
"FORKS" |
"FOREST" |
"FERRY" |
"FORT" |
"FREEWAY" |
"GARDEN" |
"GARDENS" |
"GLEN" |
"GLENS" |
"GREEN" |
"GREENS" |
"GROVE" |
"GROVES" |
"GATEWAY" |
"HARBOR" |
"HARBORS" |
"HILL" |
"HILLS" |
"HOLLOW" |
"HEIGHTS" |
"HAVEN" |
"HIGHWAY" |
"INLET" |
"ISLAND" |
"ISLANDS" |
"JUNCTION" |
"JUNCTIONS" |
"KNOLL" |
"KNOLLS" |
"KEY" |
"KEYS" |
"LOCK" |
"LOCKS" |
"LODGE" |
"LOAF" |
"LIGHT" |
"LIGHTS" |
"LAKE" |
"LAKES" |
"LANE" |
"LANDING" |
"MEADOW" |
"MEADOWS" |
"MILL" |
"MILLS" |
"MANOR" |
"MANORS" |
"MISSION" |
"MOUNT" |
"MOUNTAIN" |
"MOUNTAINS" |
"MOTORWAY" |
"NECK" |
"OVERPASS" |
"ORCHARD" |
"PARKS" |
"PARKWAY" |
"PARKWAYS" |
"PLACE" |
"PLAIN" |
"PLAINS" |
"PLAZA" |
"PINE" |
"PINES" |
"PRAIRIE" |
"PORT" |
"PORTS" |
"PASSAGE" |
"POINT" |
"POINTS" |
"RADIAL" |
"ROAD" |
"RIDGE" |
"RIDGES" |
"ROADS" |
"RIVER" |
"RANCH" |
"RAPID" |
"RAPIDS" |
"RURAL ROUTE" |
"REST" |
"ROUTE" |
"SHOAL" |
"SHOALS" |
"SHORE" |
"SHORES" |
"SKYWAY" |
"SUMMIT" |
"SPRING" |
"SPRINGS" |
"SPUR(S)" |
"SQUARE" |
"SQUARES" |
"STREET" |
"STATION" |
"STRAVENUE" |
"STREAM" |
"STREETS" |
"TERRACE" |
"TURNPIKE" |
"TRACK" |
"TRACE" |
"TRAFFICWAY" |
"TRAIL" |
"THROUGHWAY" |
"TUNNEL" |
"UNION" |
"UNIONS" |
"UNDERPASS" |
"VIADUCT" |
"VISTA" |
"VILLE" |
"VILLAGE" |
"VILLAGES" |
"VALLEY" |
"VALLEYS" |
"VIEW" |
"VIEWS" |
"WELL" |
"WELLS" |
"CROSSING"
;
class AddressLexer extends Lexer;
options {
k=10;
testLiterals=false;
}
tokens
{
ADDRESSDESIGNATORLONG;
}
WS : (' '
| '\t'
| '\f'
| '\n'
| '\r')
{ _ttype = Token.SKIP; }
;
protected
DIGIT
: '0'..'9'
;
protected
CHAR
: 'A'..'Z'
;
NUMBER : (DIGIT)+ ('/' (DIGIT)+ )? ( ('-' ( (CHAR)+ | (DIGIT)+ ) )
| (CHAR {$setType(IDENT);} )* )
;
IDENT
options {testLiterals=true;}
: (CHAR) ( CHAR |'-'|'0'..'9')*
;
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://www.antlr.org/pipermail/antlr-interest/attachments/20060622/97346c11/attachment-0001.html
More information about the antlr-interest
mailing list