UNPKG

@nickgraffis/parse-address

Version:

US Street Address Parser. Forked from https://github.com/hassansin/parse-address.

775 lines (733 loc) 20.8 kB
// Copyright (c) 2014-2015, hassansin // //Perl Ref: http://cpansearch.perl.org/src/TIMB/Geo-StreetAddress-US-1.04/US.pm "use strict"; (function () { var root; root = this; var XRegExp; if (typeof require !== "undefined") { XRegExp = require('xregexp/src/xregexp.js'); } else XRegExp = root.XRegExp; var parser = {}; var Addr_Match = {}; var Directional = { north: "N", northeast: "NE", east: "E", southeast: "SE", south: "S", southwest: "SW", west: "W", northwest: "NW", }; var Street_Type = { allee: "aly", alley: "aly", ally: "aly", anex: "anx", annex: "anx", annx: "anx", arcade: "arc", av: "ave", aven: "ave", avenu: "ave", avenue: "ave", avn: "ave", avnue: "ave", bayoo: "byu", bayou: "byu", beach: "bch", bend: "bnd", bluf: "blf", bluff: "blf", bluffs: "blfs", bot: "btm", bottm: "btm", bottom: "btm", boul: "blvd", boulevard: "blvd", boulv: "blvd", branch: "br", brdge: "brg", bridge: "brg", brnch: "br", brook: "brk", brooks: "brks", burg: "bg", burgs: "bgs", bypa: "byp", bypas: "byp", bypass: "byp", byps: "byp", camp: "cp", canyn: "cyn", canyon: "cyn", cape: "cpe", causeway: "cswy", causway: "cswy", causwa: "cswy", cen: "ctr", cent: "ctr", center: "ctr", centers: "ctrs", centr: "ctr", centre: "ctr", circ: "cir", circl: "cir", circle: "cir", circles: "cirs", ck: "crk", cliff: "clf", cliffs: "clfs", club: "clb", cmp: "cp", cnter: "ctr", cntr: "ctr", cnyn: "cyn", common: "cmn", commons: "cmns", corner: "cor", corners: "cors", course: "crse", court: "ct", courts: "cts", cove: "cv", coves: "cvs", cr: "crk", crcl: "cir", crcle: "cir", crecent: "cres", creek: "crk", crescent: "cres", cresent: "cres", crest: "crst", crossing: "xing", crossroad: "xrd", crossroads: "xrds", crscnt: "cres", crsent: "cres", crsnt: "cres", crssing: "xing", crssng: "xing", crt: "ct", curve: "curv", dale: "dl", dam: "dm", div: "dv", divide: "dv", driv: "dr", drive: "dr", drives: "drs", drv: "dr", dvd: "dv", estate: "est", estates: "ests", exp: "expy", expr: "expy", express: "expy", expressway: "expy", expw: "expy", extension: "ext", extensions: "exts", extn: "ext", extnsn: "ext", fall: "fall", falls: "fls", ferry: "fry", field: "fld", fields: "flds", flat: "flt", flats: "flts", ford: "frd", fords: "frds", forest: "frst", forests: "frst", forg: "frg", forge: "frg", forges: "frgs", fork: "frk", forks: "frks", fort: "ft", freeway: "fwy", freewy: "fwy", frry: "fry", frt: "ft", frway: "fwy", frwy: "fwy", garden: "gdn", gardens: "gdns", gardn: "gdn", gateway: "gtwy", gatewy: "gtwy", gatway: "gtwy", glen: "gln", glens: "glns", grden: "gdn", grdn: "gdn", grdns: "gdns", green: "grn", greens: "grns", grov: "grv", grove: "grv", groves: "grvs", gtway: "gtwy", harb: "hbr", harbor: "hbr", harbors: "hbrs", harbr: "hbr", haven: "hvn", havn: "hvn", height: "hts", heights: "hts", hgts: "hts", highway: "hwy", highwy: "hwy", hill: "hl", hills: "hls", hiway: "hwy", hiwy: "hwy", hllw: "holw", hollow: "holw", hollows: "holw", holws: "holw", hrbor: "hbr", ht: "hts", hway: "hwy", inlet: "inlt", island: "is", islands: "iss", isles: "isle", islnd: "is", islnds: "iss", jction: "jct", jctn: "jct", jctns: "jcts", junction: "jct", junctions: "jcts", junctn: "jct", juncton: "jct", key: "ky", keys: "kys", knol: "knl", knoll: "knl", knolls: "knls", la: "ln", lake: "lk", lakes: "lks", land: "land", landing: "lndg", lane: "ln", lanes: "ln", ldge: "ldg", light: "lgt", lights: "lgts", lndng: "lndg", loaf: "lf", lock: "lck", locks: "lcks", lodg: "ldg", lodge: "ldg", loops: "loop", mall: "mall", manor: "mnr", manors: "mnrs", meadow: "mdw", meadows: "mdws", medows: "mdws", mews: "mews", mill: "ml", mills: "mls", mission: "msn", missn: "msn", mnt: "mt", mntain: "mtn", mntn: "mtn", mntns: "mtns", motorway: "mtwy", mount: "mt", mountain: "mtn", mountains: "mtns", mountin: "mtn", mssn: "msn", mtin: "mtn", neck: "nck", orchard: "orch", orchrd: "orch", overpass: "opas", ovl: "oval", parks: "park", parkway: "pkwy", parkways: "pkwy", parkwy: "pkwy", pass: "pass", passage: "psge", paths: "path", pikes: "pike", pine: "pne", pines: "pnes", pk: "park", pkway: "pkwy", pkwys: "pkwy", pky: "pkwy", place: "pl", plain: "pln", plaines: "plns", plains: "plns", plaza: "plz", plza: "plz", point: "pt", points: "pts", port: "prt", ports: "prts", prairie: "pr", prarie: "pr", prk: "park", prr: "pr", rad: "radl", radial: "radl", radiel: "radl", ranch: "rnch", ranches: "rnch", rapid: "rpd", rapids: "rpds", rdge: "rdg", rest: "rst", ridge: "rdg", ridges: "rdgs", river: "riv", rivr: "riv", rnchs: "rnch", road: "rd", roads: "rds", route: "rte", rvr: "riv", row: "row", rue: "rue", run: "run", shoal: "shl", shoals: "shls", shoar: "shr", shoars: "shrs", shore: "shr", shores: "shrs", skyway: "skwy", spng: "spg", spngs: "spgs", spring: "spg", springs: "spgs", sprng: "spg", sprngs: "spgs", spurs: "spur", sqr: "sq", sqre: "sq", sqrs: "sqs", squ: "sq", square: "sq", squares: "sqs", station: "sta", statn: "sta", stn: "sta", str: "st", strav: "stra", strave: "stra", straven: "stra", stravenue: "stra", stravn: "stra", stream: "strm", street: "st", streets: "sts", streme: "strm", strt: "st", strvn: "stra", strvnue: "stra", sumit: "smt", sumitt: "smt", summit: "smt", terr: "ter", terrace: "ter", throughway: "trwy", tpk: "tpke", tr: "trl", trace: "trce", traces: "trce", track: "trak", tracks: "trak", trafficway: "trfy", trail: "trl", trails: "trl", trk: "trak", trks: "trak", trls: "trl", trnpk: "tpke", trpk: "tpke", tunel: "tunl", tunls: "tunl", tunnel: "tunl", tunnels: "tunl", tunnl: "tunl", turnpike: "tpke", turnpk: "tpke", underpass: "upas", union: "un", unions: "uns", valley: "vly", valleys: "vlys", vally: "vly", vdct: "via", viadct: "via", viaduct: "via", view: "vw", views: "vws", vill: "vlg", villag: "vlg", village: "vlg", villages: "vlgs", ville: "vl", villg: "vlg", villiage: "vlg", vist: "vis", vista: "vis", vlly: "vly", vst: "vis", vsta: "vis", wall: "wall", walks: "walk", well: "wl", wells: "wls", wy: "way", }; var State_Code = { "alabama": "AL", "alaska": "AK", "american samoa": "AS", "arizona": "AZ", "arkansas": "AR", "california": "CA", "colorado": "CO", "connecticut": "CT", "delaware": "DE", "district of columbia": "DC", "federated states of micronesia": "FM", "florida": "FL", "georgia": "GA", "guam": "GU", "hawaii": "HI", "idaho": "ID", "illinois": "IL", "indiana": "IN", "iowa": "IA", "kansas": "KS", "kentucky": "KY", "louisiana": "LA", "maine": "ME", "marshall islands": "MH", "maryland": "MD", "massachusetts": "MA", "michigan": "MI", "minnesota": "MN", "mississippi": "MS", "missouri": "MO", "montana": "MT", "nebraska": "NE", "nevada": "NV", "new hampshire": "NH", "new jersey": "NJ", "new mexico": "NM", "new york": "NY", "north carolina": "NC", "north dakota": "ND", "northern mariana islands": "MP", "ohio": "OH", "oklahoma": "OK", "oregon": "OR", "palau": "PW", "pennsylvania": "PA", "puerto rico": "PR", "rhode island": "RI", "south carolina": "SC", "south dakota": "SD", "tennessee": "TN", "texas": "TX", "utah": "UT", "vermont": "VT", "virgin islands": "VI", "virginia": "VA", "washington": "WA", "west virginia": "WV", "wisconsin": "WI", "wyoming": "WY", }; var Direction_Code; var initialized = false; var Normalize_Map = { prefix: Directional, prefix1: Directional, prefix2: Directional, suffix: Directional, suffix1: Directional, suffix2: Directional, type: Street_Type, type1: Street_Type, type2: Street_Type, state: State_Code, } function capitalize(s) { return s && s[0].toUpperCase() + s.slice(1); } function keys(o) { return Object.keys(o); } function values(o) { var v = []; keys(o).forEach(function (k) { v.push(o[k]); }); return v; } function each(o, fn) { keys(o).forEach(function (k) { fn(o[k], k); }); } function invert(o) { var o1 = {}; keys(o).forEach(function (k) { o1[o[k]] = k; }); return o1; } function flatten(o) { return keys(o).concat(values(o)); } function lazyInit() { if (initialized) { return; } initialized = true; Direction_Code = invert(Directional); /* var Street_Type_Match = {}; each(Street_Type,function(v,k){ Street_Type_Match[v] = XRegExp.escape(v) }); each(Street_Type,function(v,k){ Street_Type_Match[v] = Street_Type_Match[v] + "|" + XRegExp.escape(k); }); each(Street_Type_Match,function(v,k){ Street_Type_Match[k] = new RegExp( '\\b(?:' + Street_Type_Match[k] + ')\\b', 'i') }); */ Addr_Match = { type: flatten(Street_Type).sort().filter(function (v, i, arr) { return arr.indexOf(v) === i }).join('|'), fraction: '\\d+\\/\\d+', state: '\\b(?:' + keys(State_Code).concat(values(State_Code)).map(XRegExp.escape).join('|') + ')\\b', direct: values(Directional).sort(function (a, b) { return a.length < b.length }).reduce(function (prev, curr) { return prev.concat([XRegExp.escape(curr.replace(/\w/g, '$&.')), curr]) }, keys(Directional)).join('|'), dircode: keys(Direction_Code).join("|"), zip: '(?<zip>\\d{5})[- ]?(?<plus4>\\d{4})?', corner: '(?:\\band\\b|\\bat\\b|&|\\@)', }; Addr_Match.number = '(?<number>(\\d+-?\\d*)|([N|S|E|W]\\d{1,3}[N|S|E|W]\\d{1,6}))(?=\\D)'; Addr_Match.street = ' \n\ (?: \n\ (?:(?<street_0>'+ Addr_Match.direct + ')\\W+ \n\ (?<type_0>'+ Addr_Match.type + ')\\b \n\ ) \n\ | \n\ (?:(?<prefix_0>'+ Addr_Match.direct + ')\\W+)? \n\ (?: \n\ (?<street_1>[^,]*\\d) \n\ (?:[^\\w,]*(?<suffix_1>'+ Addr_Match.direct + ')\\b) \n\ | \n\ (?<street_2>[^,]+) \n\ (?:[^\\w,]+(?<type_2>'+ Addr_Match.type + ')\\b) \n\ (?:[^\\w,]+(?<suffix_2>'+ Addr_Match.direct + ')\\b)? \n\ | \n\ (?<street_3>[^,]+?) \n\ (?:[^\\w,]+(?<type_3>'+ Addr_Match.type + ')\\b)? \n\ (?:[^\\w,]+(?<suffix_3>'+ Addr_Match.direct + ')\\b)? \n\ ) \n\ )'; Addr_Match.po_box = 'p\\W*(?:[om]|ost\\ ?office)\\W*b(?:ox)?' Addr_Match.sec_unit_type_numbered = ' \n\ (?<sec_unit_type_1>su?i?te \n\ |apt(?:a?r?t?me?n?t)? \n\ |apartment \n\ |unit \n\ |su \n\ |sui \n\ |floor \n\ |uni?t \n\ |'+ Addr_Match.po_box + ' \n\ |bu?i?ldi?n?g \n\ |lo?t \n\ |fl(?:oo)?r? \n\ |ro*m \n\ |box)(?![a-z] \n\ ) \n\ '; Addr_Match.sec_unit_type_unnumbered = ' \n\ (?<sec_unit_type_2>ba?se?me?n?t \n\ |fro?nt \n\ |lo?bby \n\ |lowe?r \n\ |off?i?ce? \n\ |pe?n?t?ho?u?s?e? \n\ |rear \n\ |side \n\ |uppe?r \n\ )\\b'; Addr_Match.sec_unit = ' \n\ (?: #fix3 \n\ (?: #fix1 \n\ (?: \n\ (?:'+ Addr_Match.sec_unit_type_numbered + '\\W*) \n\ |(?<sec_unit_type_3>\\#)\\W* \n\ ) \n\ (?<sec_unit_num_1>[\\w-]+) \n\ ) \n\ | \n\ '+ Addr_Match.sec_unit_type_unnumbered + ' \n\ )'; Addr_Match.city_and_state = ' \n\ (?: \n\ (?<city>[^\\d,]+?)\\W+ \n\ (?<state>'+ Addr_Match.state + ') \n\ ) \n\ '; Addr_Match.place = ' \n\ (?:'+ Addr_Match.city_and_state + '\\W*)? \n\ (?:'+ Addr_Match.zip + ')? \n\ '; Addr_Match.address = XRegExp(' \n\ ^ \n\ [^\\w\\#]* \n\ ('+ Addr_Match.number + ')\\W* \n\ (?:'+ Addr_Match.fraction + '\\W*)? \n\ '+ Addr_Match.street + '\\W+ \n\ (?:'+ Addr_Match.sec_unit + ')?\\W* #fix2 \n\ '+ Addr_Match.place + ' \n\ \\W*$', 'ix'); var sep = '(?:\\W+|$)'; // no support for \Z Addr_Match.informal_address = XRegExp(' \n\ ^ \n\ \\s* \n\ (?:'+ Addr_Match.sec_unit + sep + ')? \n\ (?:'+ Addr_Match.number + ')?\\W* \n\ (?:'+ Addr_Match.fraction + '\\W*)? \n\ '+ Addr_Match.street + sep + ' \n\ (?:'+ Addr_Match.sec_unit.replace(/_\d/g, '$&1') + sep + ')? \n\ (?:'+ Addr_Match.place + ')? \n\ ', 'ix'); Addr_Match.po_address = XRegExp(' \n\ ^ \n\ \\s* \n\ (?:'+ Addr_Match.sec_unit.replace(/_\d/g, '$&1') + sep + ')? \n\ (?:'+ Addr_Match.place + ')? \n\ ', 'ix'); Addr_Match.intersection = XRegExp(' \n\ ^\\W* \n\ '+ Addr_Match.street.replace(/_\d/g, '1$&') + '\\W*? \n\ \\s+'+ Addr_Match.corner + '\\s+ \n\ '+ Addr_Match.street.replace(/_\d/g, '2$&') + '($|\\W+) \n\ '+ Addr_Match.place + '\\W*$', 'ix'); } parser.normalize_address = function (parts) { lazyInit(); if (!parts) return null; var parsed = {}; Object.keys(parts).forEach(function (k) { if (['input', 'index'].indexOf(k) !== -1 || isFinite(k)) return; var key = isFinite(k.split('_').pop()) ? k.split('_').slice(0, -1).join('_') : k; if (parts[k]) parsed[key] = parts[k].trim().replace(/^\s+|\s+$|[^\w\s\-#&]/g, ''); }); // Normalize specific keys using the normalization maps each(Normalize_Map, function (map, key) { if (parsed[key] && map[parsed[key].toLowerCase()]) { parsed[key] = map[parsed[key].toLowerCase()]; } }); // Normalize apartment/unit types to proper capitalization ['type', 'type1', 'type2'].forEach(function (key) { if (key in parsed) parsed[key] = parsed[key].charAt(0).toUpperCase() + parsed[key].slice(1).toLowerCase(); }); // Handle apartment number normalization if (parsed.sec_unit_type_1 && (parsed.sec_unit_type_1.toLowerCase() === 'apt' || parsed.sec_unit_type_1.toLowerCase() === 'unit')) { parsed.sec_unit_type = capitalize(parsed.sec_unit_type_1); } // Handle PO Box number normalization if (parsed.sec_unit_type_1 && (parsed.sec_unit_type_1.toLowerCase() === 'po box')) { parsed.sec_unit_type = 'PO Box'; } // Handle city capitalization if (parsed.city) { parsed.city = XRegExp.replace(parsed.city, XRegExp('^(?<dircode>' + Addr_Match.dircode + ')\\s+(?=\\S)', 'ix'), function (match) { return capitalize(Direction_Code[match.dircode.toUpperCase()]) + ' '; } ); } return parsed; }; parser.parseAddress = function (address) { lazyInit(); var parts = XRegExp.exec(address, Addr_Match.address); return parser.normalize_address(parts); }; parser.parseInformalAddress = function (address) { lazyInit(); var parts = XRegExp.exec(address, Addr_Match.informal_address); return parser.normalize_address(parts); }; parser.parsePoAddress = function (address) { lazyInit(); var parts = XRegExp.exec(address, Addr_Match.po_address); return parser.normalize_address(parts); }; parser.parseLocation = function (address) { lazyInit(); // Check for PO Box address if (XRegExp('^' + Addr_Match.po_box, 'xi').test(address)) { return parser.parsePoAddress(address); } // Check for intersections if (XRegExp(Addr_Match.corner, 'xi').test(address)) { return parser.parseIntersection(address); } // Fallback to parsing regular or informal address return parser.parseAddress(address) || parser.parseInformalAddress(address); }; parser.parseIntersection = function (address) { lazyInit(); var parts = XRegExp.exec(address, Addr_Match.intersection); parts = parser.normalize_address(parts); if (parts) { parts.type2 = parts.type2 || ''; parts.type1 = parts.type1 || ''; if (parts.type2 && !parts.type1 || (parts.type1 === parts.type2)) { var type = parts.type2; type = XRegExp.replace(type, /s\W*$/, ''); if (XRegExp('^' + Addr_Match.type + '$', 'ix').test(type)) { parts.type1 = parts.type2 = type; } } } return parts; }; // AMD / RequireJS if (typeof define !== 'undefined' && define.amd) { define([], function () { return parser; }); } // Node.js else if (typeof exports !== "undefined") { exports.parseIntersection = parser.parseIntersection; exports.parseLocation = parser.parseLocation; exports.parseInformalAddress = parser.parseInformalAddress; exports.parseAddress = parser.parseAddress; } // included directly via <script> tag else { root.addressParser = root.addressParser || parser; } }());