UNPKG

zips

Version:

Light, fast, tree-based way to get cities by zipcode and location.

219 lines (181 loc) 5.52 kB
import { readline } from "https://deno.land/x/readline@v1.1.0/mod.ts"; import ProgressBar from "https://deno.land/x/progress@v1.3.6/mod.ts"; import KDTree from "./src/kd-tree.mjs"; import KDArrayTree from "./src/kd-array-tree.mjs"; function parseLine(line) { const lineStr = new TextDecoder().decode(line); const parts = lineStr.split("\t").map((str) => str.trim()); return { country: parts[0], zip: parts[1], city: parts[2], state: parts[4], lat: parseFloat(parts[9]), long: parseFloat(parts[10]), }; } console.log("loading..."); const f = await Deno.open("./US.txt"); let start = Date.now(); let places = []; for await (const line of readline(f)) { places.push(parseLine(line)); } // (perfect) 3837431 // best shuffle 3964908 (103.322%) // tree order 3971468 (103.493%) < // median dist 3990969 (104.001%) // median sort 4018906 (104.729%) // lat median 4031597 (105.060%) // long median 4032787 (105.091%) // unsorted 4032842 (105.092%) const shuffled = bestShuffle(places); console.log("shuffled", shuffled.length); const treeOrdered = treeOrder(places); console.log("treeOrdered", treeOrdered.length); const balanced = balanceTree(places); console.log("balanced", balanced.length); // const balancedArray = balanceArrayTree(places) ?? ""; // console.log("balanced array", balancedArray.length); const best = balanced.length < treeOrdered.length ? balanced : treeOrdered; console.log(JSON.stringify(places).length, best.length); await Deno.writeTextFile("loc-kd-tree.json", best); start = Date.now(); const loadTree = new KDTree("lat", "long"); loadTree.import(await Deno.readTextFile("loc-kd-tree.json")); console.log(`load (${Date.now() - start}ms)`); start = Date.now(); const testCount = 1000; for (let i = 1; i <= testCount; i++) { const lat = 35 + Math.random() * 5; // 35 to 40 const long = -120 + Math.random() * 40; // -120 to -80 const t = loadTree.search(lat, long); if (t === null) { console.error(lat, long); } } console.log(`search (~${((Date.now() - start) / testCount).toFixed(3)}ms)`); /* function medianUnmerge(arr, key) { function muImpl(arr) { if (arr.length < 3) { return arr; } const mid = Math.floor(arr.length / 2); return [ arr[mid], ...muImpl(arr.slice(0, mid)), ...muImpl(arr.slice(mid + 1)) ]; } return muImpl(arr.sort((a, b) => a[key] - b[key])); } function medianDistSort(arr, key) { arr = arr.sort((a, b) => a[key] - b[key]); const med = arr[Math.floor(arr.length / 2)]; return arr.sort((a, b) => { return Math.abs(a[key] - med[key]) - Math.abs(b[key] - med[key]); }); } const latSorted = medianDistSort(places, "lat"); const longSorted = medianDistSort(places, "long"); let latIndex = 0; let longIndex = 0; let insertedZips = new Set(); while ( latIndex < latSorted.length || longIndex < longSorted.length ) { while ( latIndex < latSorted.length && insertedZips.has(latSorted[latIndex].zip) ) { latIndex++; } if (latIndex < latSorted.length) { kdt.insert(latSorted[latIndex]); insertedZips.add(latSorted[latIndex].zip); } while ( longIndex < longSorted.length && insertedZips.has(longSorted[longIndex].zip) ) { longIndex++; } if (longIndex < longSorted.length) { kdt.insert(longSorted[longIndex]); insertedZips.add(longSorted[longIndex].zip); } } */ function treeOrder(places) { const kdt = new KDTree("lat", "long"); places.forEach((p) => kdt.insert(p)); const order = kdt.toArray(); kdt.clear(); order.forEach((p) => kdt.insert(p)); return kdt.export(); } // best shuffle function bestShuffle(places, tries = 100) { const sTree = new KDTree("lat", "long"); const progress = new ProgressBar({ title: "tree order", total: tries, }); let best = null; let iter = 0; while (iter++ < tries) { places.sort((a, b) => Math.random() - 0.5); places.forEach((p) => sTree.insert(p)); const str = sTree.export(); if (best === null || str.length < best.length) { best = str; } progress.render(iter); sTree.clear(); } return best; } function balanceTree(places) { const axes = ["lat", "long"]; function medianSplitSort(arr, depth) { const axis = axes[depth % axes.length]; arr.sort((a, b) => a[axis] - b[axis]); if (arr.length < 3) { return arr; } const mid = Math.floor(arr.length / 2); return [ arr[mid], ...medianSplitSort(arr.slice(0, mid), depth + 1), ...medianSplitSort(arr.slice(mid + 1), depth + 1), ]; } const kdt = new KDTree(...axes); const sorted = medianSplitSort(places, 0); sorted.forEach((p) => kdt.insert(p)); return kdt.export(); } async function balanceArrayTree(places) { const axes = ["lat", "long"]; function medianSplitSort(arr, depth) { const axis = axes[depth % axes.length]; arr.sort((a, b) => a[axis] - b[axis]); if (arr.length < 3) { return arr; } const mid = Math.floor(arr.length / 2); return [ arr[mid], ...medianSplitSort(arr.slice(0, mid), depth + 1), ...medianSplitSort(arr.slice(mid + 1), depth + 1), ]; } const kdt = new KDArrayTree(...axes); const sorted = medianSplitSort(places, 0); sorted.forEach((p) => kdt.insert(p)); console.log(kdt.items.length); await Deno.writeTextFile("kda-tree.txt", kdt.prettyPrint()); // return kdt.export(); }