UNPKG

pelias-openaddresses

Version:

Pelias import pipeline for OpenAddresses.

284 lines (245 loc) 11.4 kB
const tape = require('tape'); const analyzer = require('../lib/cleanup_v2').streetName; tape('analyzer', (t) => { t.equal(typeof analyzer, 'function', 'analyzer is a function'); t.equal(analyzer.length, 1, 'analyzer accepts body'); t.end(); }); // --- Letter Casing --- // fix casing on uppercased tokens tape('casing - fix uppercased tokens', (t) => { t.equal(analyzer('MAIN STREET'), 'Main Street'); t.equal(analyzer('DR M L KING JR BOULEVARD'), 'Dr M L King Jr Boulevard'); // uppercase tokens ending with a period t.equal(analyzer('DR MLK. JR. BOULEVARD'), 'Dr MLK. JR. Boulevard'); t.end(); }); // fix casing on lowercased tokens tape('casing - fix lowercased tokens', (t) => { t.equal(analyzer('main street'), 'Main Street'); t.equal(analyzer('dr m l king jr boulevard'), 'Dr M L King Jr Boulevard'); // uppercase tokens ending with a period t.equal(analyzer('dr mlk. jr. boulevard'), 'Dr MLK. JR. Boulevard'); t.end(); }); // ingore casing on mixedcase tokens tape('casing - ingore casing on mixedcase tokens', (t) => { t.equal(analyzer('Willie Mc Donald Way'), 'Willie Mc Donald Way'); t.equal(analyzer('McCallister Street'), 'McCallister Street'); t.equal(analyzer('Mc Callister Street'), 'Mc Callister Street'); t.end(); }); // --- Expanding the 'generic' part of the street name --- // expand contracted 'generic' term tape('generic expansion - final token position', (t) => { t.equal(analyzer('10 main street'), '10 Main Street'); t.equal(analyzer('10 main St.'), '10 Main Street'); t.equal(analyzer('10 main st.'), '10 Main Street'); t.equal(analyzer('10 main str'), '10 Main Street'); t.equal(analyzer('10 main st'), '10 Main Street'); t.equal(analyzer('10 main road'), '10 Main Road'); t.equal(analyzer('10 main Rd.'), '10 Main Road'); t.equal(analyzer('10 main rd.'), '10 Main Road'); t.equal(analyzer('10 main rd'), '10 Main Road'); t.equal(analyzer('10 main avenue'), '10 Main Avenue'); t.equal(analyzer('10 main Ave.'), '10 Main Avenue'); t.equal(analyzer('10 main ave.'), '10 Main Avenue'); t.equal(analyzer('10 main ave'), '10 Main Avenue'); t.equal(analyzer('10 main avenue'), '10 Main Avenue'); t.equal(analyzer('10 main Ave.'), '10 Main Avenue'); t.equal(analyzer('10 main ave.'), '10 Main Avenue'); t.equal(analyzer('10 main ave'), '10 Main Avenue'); t.end(); }); // do not expand 'generic' term when not in final token position tape('generic expansion - not final token position', (t) => { t.equal(analyzer('10 main st st'), '10 Main St Street'); t.equal(analyzer('10 main st junction'), '10 Main St Junction'); t.equal(analyzer('AVE ST RD ST PKWY ST'), 'Ave St Rd St Pkwy Street'); t.end(); }); // we should expand the 'generic' when directly before a directional tape('generic expansion - before directionals', (t) => { t.equal(analyzer('Main St N'), 'Main Street North'); t.equal(analyzer('Main St S'), 'Main Street South'); t.equal(analyzer('Main St E'), 'Main Street East'); t.equal(analyzer('Main St W'), 'Main Street West'); t.equal(analyzer('Main St North'), 'Main Street North'); t.equal(analyzer('Main St South'), 'Main Street South'); t.equal(analyzer('Main St East'), 'Main Street East'); t.equal(analyzer('Main St West'), 'Main Street West'); t.end(); }); // do not expand a 'generic' term when there is only one token // this is logical as the 'generic' should always be paired with // a 'specific'. // note: this is likely not nessesary but adds a little more safety // feel free to remove this restriction later if it doesn't make sense. tape('generic expansion - single token', (t) => { t.equal(analyzer('st'), 'St'); t.equal(analyzer('espl'), 'Espl'); t.end(); }); // @todo: what should we do when there are multiple 'generic' tokens? tape('generic expansion - multiple generic tokens', (t) => { t.equal(analyzer('W FARMS SQ PLZ'), 'West Farms Sq Plaza'); t.end(); }); // @todo: what should we do when the 'generic' preceeds the 'specific'? // @note: currently this expands 'Ave S' but not 'Ave X' because it thinks // that S refers to a directional. tape('generic expansion - multiple generic tokens', (t) => { t.equal(analyzer('AVE X'), 'Ave X'); t.equal(analyzer('AVE S'), 'Avenue S'); t.end(); }); // --- Expanding the 'directional' part of the street name --- // expand directionals // note: one issue with contracting directionals is getting // something like 'East Coast Road' to not change. tape('expand directionals - first token position', (t) => { t.equal(analyzer('N Main Street'), 'North Main Street'); t.equal(analyzer('S Main Street'), 'South Main Street'); t.equal(analyzer('E Main Street'), 'East Main Street'); t.equal(analyzer('W Main Street'), 'West Main Street'); t.end(); }); tape('expand directionals - last token position', (t) => { t.equal(analyzer('Main Street N'), 'Main Street North'); t.equal(analyzer('Main Street S'), 'Main Street South'); t.equal(analyzer('Main Street E'), 'Main Street East'); t.equal(analyzer('Main Street W'), 'Main Street West'); t.end(); }); // do not expand NSEW directionals tape('expand directionals - first token position', (t) => { t.equal(analyzer('NE Main Street'), 'NE Main Street'); t.equal(analyzer('SE Main Street'), 'SE Main Street'); t.equal(analyzer('NW Main Street'), 'NW Main Street'); t.equal(analyzer('SW Main Street'), 'SW Main Street'); t.end(); }); tape('expand directionals - last token position', (t) => { t.equal(analyzer('Main Street NE'), 'Main Street NE'); t.equal(analyzer('Main Street SE'), 'Main Street SE'); t.equal(analyzer('Main Street NW'), 'Main Street NW'); t.equal(analyzer('Main Street SW'), 'Main Street SW'); t.end(); }); // do not expand directionals unless 3 or more tokens present tape('expand directionals - only when 3 or more tokens', (t) => { t.equal(analyzer('N Street'), 'N Street'); t.equal(analyzer('S Street'), 'S Street'); t.equal(analyzer('E Street'), 'E Street'); t.equal(analyzer('W Street'), 'W Street'); t.end(); }); // do not expand directionals when followed by a 'generic' tape('expand directionals - unless followed by a generic', (t) => { t.equal(analyzer('N St Station'), 'N St Station'); t.equal(analyzer('N Street Station'), 'N Street Station'); t.equal(analyzer('N Ave Junction'), 'N Ave Junction'); t.equal(analyzer('N Avenue Junction'), 'N Avenue Junction'); t.end(); }); // contract english diagonals (southwest,southeast...) tape('contract english diagonals - first token position', (t) => { t.equal(analyzer('Northeast Main Street'), 'NE Main Street'); t.equal(analyzer('Southeast Main Street'), 'SE Main Street'); t.equal(analyzer('Northwest Main Street'), 'NW Main Street'); t.equal(analyzer('Southwest Main Street'), 'SW Main Street'); t.end(); }); tape('contract english diagonals - last token position', (t) => { t.equal(analyzer('Main Street Northeast'), 'Main Street NE'); t.equal(analyzer('Main Street Southeast'), 'Main Street SE'); t.equal(analyzer('Main Street Northwest'), 'Main Street NW'); t.equal(analyzer('Main Street Southwest'), 'Main Street SW'); t.end(); }); // add missing English street name ordinals tape('add missing English street name ordinals', (t) => { t.equal(analyzer('W 26 St'), 'West 26th Street'); t.equal(analyzer('W 26th St'), 'West 26th Street'); t.equal(analyzer('1 St'), '1st Street'); t.equal(analyzer('2 Rd'), '2nd Road'); t.equal(analyzer('3 Ave'), '3rd Avenue'); t.equal(analyzer('4 Ln'), '4th Lane'); t.equal(analyzer('11 St'), '11th Street'); t.equal(analyzer('12 Rd'), '12th Road'); t.equal(analyzer('13 Ave'), '13th Avenue'); t.equal(analyzer('14 Ln'), '14th Lane'); t.equal(analyzer('101 St'), '101st Street'); t.equal(analyzer('102 Rd'), '102nd Road'); t.equal(analyzer('103 Ave'), '103rd Avenue'); t.equal(analyzer('104 Ln'), '104th Lane'); t.equal(analyzer('no 1 st'), 'No 1 Street'); t.equal(analyzer('no #1 st'), 'No #1 Street'); t.end(); }); // --- NOOP inputs which should never change --- // no-ops, these inputs should not change regardless of the algorithm used tape('no-ops', (t) => { // street names composed entirely of 'generic' tokens t.equal(analyzer('Esplanade'), 'Esplanade'); t.equal(analyzer('Park Road'), 'Park Road'); // do not contract directionals which are part of the name t.equal(analyzer('East Coast Road'), 'East Coast Road'); // number prefix t.equal(analyzer('No 1 Road'), 'No 1 Road'); // spanish prefix 'la' should never be expanded to 'lane' t.equal(analyzer('La Bamba Road'), 'La Bamba Road'); // directional as street name t.equal(analyzer('N Street'), 'N Street'); t.equal(analyzer('No Street'), 'No Street'); t.equal(analyzer('North Street'), 'North Street'); t.equal(analyzer('Northe Street'), 'Northe Street'); // do not anglicise/de-anglicise names t.equal(analyzer('Centre Road'), 'Centre Road'); t.equal(analyzer('Center Road'), 'Center Road'); t.equal(analyzer('Annex Road'), 'Annex Road'); t.equal(analyzer('Anex Road'), 'Anex Road'); // personal title in middle of name t.equal(analyzer('Main Road St Arnaud'), 'Main Road St Arnaud'); t.equal(analyzer('Mount St John Avenue'), 'Mount St John Avenue'); t.end(); }); tape('misc', (t) => { t.equal(analyzer('YELLOWSTONE BLVD'), 'Yellowstone Boulevard'); t.equal(analyzer('YESHIVA LN'), 'Yeshiva Lane'); t.equal(analyzer('WYGANT PL'), 'Wygant Place'); t.equal(analyzer('W 262 ST'), 'West 262nd Street'); t.equal(analyzer('W 26TH ST'), 'West 26th Street'); t.equal(analyzer('WILLIE MC DONALD WAY'), 'Willie Mc Donald Way'); t.equal(analyzer('West 93rd Street'), 'West 93rd Street'); t.equal(analyzer('JFK AIRPORT'), 'Jfk Airport'); // this should really uppercase JFK t.equal(analyzer('DR M L KING JR BLVD'), 'Dr M L King Jr Boulevard'); // not perfect t.equal(analyzer('E HAMPTON BLVD'), 'East Hampton Boulevard'); t.equal(analyzer('MARATHON PKWY'), 'Marathon Parkway'); t.equal(analyzer('ANDREWS AVE S'), 'Andrews Avenue South'); t.equal(analyzer('W 13 ST'), 'West 13th Street'); t.end(); }); tape('misc directionals', (t) => { t.equal(analyzer('W KINGSBRIDGE RD'), 'West Kingsbridge Road'); t.equal(analyzer('W MOSHOLU PKWY S'), 'West Mosholu Parkway South'); t.equal(analyzer('WILLIAMSBURG ST E'), 'Williamsburg Street East'); t.equal(analyzer('W MOSHOLU PKWY N'), 'West Mosholu Parkway North'); t.equal(analyzer('W MOSHOLU PKWY SE'), 'West Mosholu Parkway SE'); t.equal(analyzer('S WILLIAM ST'), 'South William Street'); t.equal(analyzer('Foo ST South East'), 'Foo Street South East'); t.end(); }); // tape('prefix expansions', (t) => { // t.equal(analyzer('ST JAMES ST'), 'Saint James Street'); // t.equal(analyzer('ST JAMES AVE'), 'Saint James Avenue'); // t.equal(analyzer('ST. JAMES AVE'), 'Saint James Avenue'); // t.equal(analyzer('ST NICHOLAS TER'), 'Saint Nicholas Terrace'); // t.equal(analyzer('MT DOOM CRES'), 'Mount Doom Crescent'); // t.equal(analyzer('MT. DOOM CRES'), 'Mount Doom Crescent'); // t.equal(analyzer('FT IMPENETRABLE ROW'), 'Fort Impenetrable Row'); // t.equal(analyzer('FT. IMPENETRABLE ROW'), 'Fort Impenetrable Row'); // t.equal(analyzer('St Leonards Drive'), 'Saint Leonards Drive'); // t.equal(analyzer('St Andrew Street'), 'Saint Andrew Street'); // t.end(); // });