addressit
Version:
Freeform Street Address Parser
130 lines (115 loc) • 4.34 kB
JavaScript
/* jshint node: true */
;
var Address = require('../address');
var compiler = require('./compiler');
// initialise the street regexes
// these are the regexes for determining whether or not a string is a street
// it is important to note that they are parsed through the reStreetCleaner
// regex to become more strict
// this list has been sourced from:
// https://www.propertyassist.sa.gov.au/pa/qhelp.phtml?cmd=streettype
//
// __NOTE:__ Some of the street types have been disabled due to collisions
// with common parts of suburb names. At some point the street parser may be
// improved to deal with these cases, but for now this has been deemed
// suitable.
var streetRegexes = compiler([
'ALLE?Y', // ALLEY / ALLY
'APP(ROACH)?', // APPROACH / APP
'ARC(ADE)?', // ARCADE / ARC
'AV(E|ENUE)?', // AVENUE / AV / AVE
'(BOULEVARD|BLVD)', // BOULEVARD / BLVD
'BROW', // BROW
'BYPA(SS)?', // BYPASS / BYPA
'C(AUSE)?WAY', // CAUSEWAY / CWAY
'(CIRCUIT|CCT)', // CIRCUIT / CCT
'CIRC(US)?', // CIRCUS / CIRC
'CL(OSE)?', // CLOSE / CL
'CO?PSE', // COPSE / CPSE
'(CORNER|CNR)', // CORNER / CNR
// 'COVE', // COVE
'(C((OUR)|R)?T|CRT)', // COURT / CT /CRT
'CRES(CENT)?', // CRESCENT / CRES
'DR(IVE)?', // DRIVE / DR
// 'END', // END
'ESP(LANADE)?', // ESPLANADE / ESP
// 'FLAT', // FLAT
'F(REE)?WAY', // FREEWAY / FWAY
'(FRONTAGE|FRNT)', // FRONTAGE / FRNT
// '(GARDENS|GDNS)', // GARDENS / GDNS
'(GLADE|GLD)', // GLADE / GLD
// 'GLEN', // GLEN
'GR(EE)?N', // GREEN / GRN
// 'GR(OVE)?', // GROVE / GR
// 'H(EIGH)?TS', // HEIGHTS / HTS
'(HIGHWAY|HWY)', // HIGHWAY / HWY
'(LANE|LN)', // LANE / LN
'LINK', // LINK
'LOOP', // LOOP
'MALL', // MALL
'MEWS', // MEWS
'(PACKET|PCKT)', // PACKET / PCKT
'P(ARA)?DE', // PARADE / PDE
// 'PARK', // PARK
'(PARKWAY|PKWY)', // PARKWAY / PKWY
'PL(ACE)?', // PLACE / PL
'PROM(ENADE)?', // PROMENADE / PROM
'RES(ERVE)?', // RESERVE / RES
// 'RI?DGE', // RIDGE / RDGE
'RISE', // RISE
'R(OA)?D', // ROAD / RD
'ROW', // ROW
'SQ(UARE)?', // SQUARE / SQ
'ST(REET)?', // STREET / ST
'STRI?P', // STRIP / STRP
'TARN', // TARN
'T(ERRA)?CE|TER?R', // TERRACE / TER / TERR / TCE
'(THOROUGHFARE|TFRE)', // THOROUGHFARE / TFRE
'TRACK?', // TRACK / TRAC
'TR(AI)?L', // TRAIL / TRL
'T(RUNK)?WAY', // TRUNKWAY / TWAY
// 'VIEW', // VIEW
'VI?STA', // VISTA / VSTA
'WALK', // WALK
'WA?Y', // WAY / WY
'W(ALK)?WAY', // WALKWAY / WWAY
'YARD', // YARD
'BROADWAY'
]);
var reSplitStreet = /^(N|NTH|NORTH|E|EST|EAST|S|STH|SOUTH|W|WST|WEST)\,$/i;
var reNoStreet = compiler(['BROADWAY']).pop();
module.exports = function(text, opts) {
var address = new Address(text, opts);
// clean the address
address
.clean([
// remove trailing dots from two letter abbreviations
function(input) {
return input.replace(/(\w{2})\./g, '$1');
},
// convert shop to a unit format
function(input) {
return input.replace(/^\s*SHOP\s?(\d*)\,?\s*/i, '$1/');
}
])
// split the address
.split(/\s/)
// extract the unit
.extract('unit', [
(/^(?:\#|APT|APARTMENT)\s?(\d+)/),
(/^(\d+)\/(.*)/)
])
// extract the street
.extractStreet(streetRegexes, reSplitStreet, reNoStreet);
if (opts && opts.state) {
address.extract('state', opts.state );
}
if (opts && opts.country) {
address.extract('country', opts.country );
}
if (opts && opts.rePostalCode) {
address.extract('postalcode', [ opts.rePostalCode ]);
}
// take remaining unknown parts and push them
return address.finalize();
};