zips
Version:
Light, fast, tree-based way to get cities by zipcode and location.
219 lines (181 loc) • 5.52 kB
JavaScript
import { readline } from "https://deno.land/x/readline@v1.1.0/mod.ts";
import ProgressBar from "https://deno.land/x/progress@v1.3.6/mod.ts";
import KDTree from "./src/kd-tree.mjs";
import KDArrayTree from "./src/kd-array-tree.mjs";
function parseLine(line) {
const lineStr = new TextDecoder().decode(line);
const parts = lineStr.split("\t").map((str) => str.trim());
return {
country: parts[0],
zip: parts[1],
city: parts[2],
state: parts[4],
lat: parseFloat(parts[9]),
long: parseFloat(parts[10]),
};
}
console.log("loading...");
const f = await Deno.open("./US.txt");
let start = Date.now();
let places = [];
for await (const line of readline(f)) {
places.push(parseLine(line));
}
// (perfect) 3837431
// best shuffle 3964908 (103.322%)
// tree order 3971468 (103.493%) <
// median dist 3990969 (104.001%)
// median sort 4018906 (104.729%)
// lat median 4031597 (105.060%)
// long median 4032787 (105.091%)
// unsorted 4032842 (105.092%)
const shuffled = bestShuffle(places);
console.log("shuffled", shuffled.length);
const treeOrdered = treeOrder(places);
console.log("treeOrdered", treeOrdered.length);
const balanced = balanceTree(places);
console.log("balanced", balanced.length);
// const balancedArray = balanceArrayTree(places) ?? "";
// console.log("balanced array", balancedArray.length);
const best = balanced.length < treeOrdered.length ? balanced : treeOrdered;
console.log(JSON.stringify(places).length, best.length);
await Deno.writeTextFile("loc-kd-tree.json", best);
start = Date.now();
const loadTree = new KDTree("lat", "long");
loadTree.import(await Deno.readTextFile("loc-kd-tree.json"));
console.log(`load (${Date.now() - start}ms)`);
start = Date.now();
const testCount = 1000;
for (let i = 1; i <= testCount; i++) {
const lat = 35 + Math.random() * 5; // 35 to 40
const long = -120 + Math.random() * 40; // -120 to -80
const t = loadTree.search(lat, long);
if (t === null) {
console.error(lat, long);
}
}
console.log(`search (~${((Date.now() - start) / testCount).toFixed(3)}ms)`);
/*
function medianUnmerge(arr, key) {
function muImpl(arr) {
if (arr.length < 3) {
return arr;
}
const mid = Math.floor(arr.length / 2);
return [
arr[mid],
...muImpl(arr.slice(0, mid)),
...muImpl(arr.slice(mid + 1))
];
}
return muImpl(arr.sort((a, b) => a[key] - b[key]));
}
function medianDistSort(arr, key) {
arr = arr.sort((a, b) => a[key] - b[key]);
const med = arr[Math.floor(arr.length / 2)];
return arr.sort((a, b) => {
return Math.abs(a[key] - med[key]) - Math.abs(b[key] - med[key]);
});
}
const latSorted = medianDistSort(places, "lat");
const longSorted = medianDistSort(places, "long");
let latIndex = 0;
let longIndex = 0;
let insertedZips = new Set();
while (
latIndex < latSorted.length ||
longIndex < longSorted.length
) {
while (
latIndex < latSorted.length &&
insertedZips.has(latSorted[latIndex].zip)
) {
latIndex++;
}
if (latIndex < latSorted.length) {
kdt.insert(latSorted[latIndex]);
insertedZips.add(latSorted[latIndex].zip);
}
while (
longIndex < longSorted.length &&
insertedZips.has(longSorted[longIndex].zip)
) {
longIndex++;
}
if (longIndex < longSorted.length) {
kdt.insert(longSorted[longIndex]);
insertedZips.add(longSorted[longIndex].zip);
}
}
*/
function treeOrder(places) {
const kdt = new KDTree("lat", "long");
places.forEach((p) => kdt.insert(p));
const order = kdt.toArray();
kdt.clear();
order.forEach((p) => kdt.insert(p));
return kdt.export();
}
// best shuffle
function bestShuffle(places, tries = 100) {
const sTree = new KDTree("lat", "long");
const progress = new ProgressBar({
title: "tree order",
total: tries,
});
let best = null;
let iter = 0;
while (iter++ < tries) {
places.sort((a, b) => Math.random() - 0.5);
places.forEach((p) => sTree.insert(p));
const str = sTree.export();
if (best === null || str.length < best.length) {
best = str;
}
progress.render(iter);
sTree.clear();
}
return best;
}
function balanceTree(places) {
const axes = ["lat", "long"];
function medianSplitSort(arr, depth) {
const axis = axes[depth % axes.length];
arr.sort((a, b) => a[axis] - b[axis]);
if (arr.length < 3) {
return arr;
}
const mid = Math.floor(arr.length / 2);
return [
arr[mid],
...medianSplitSort(arr.slice(0, mid), depth + 1),
...medianSplitSort(arr.slice(mid + 1), depth + 1),
];
}
const kdt = new KDTree(...axes);
const sorted = medianSplitSort(places, 0);
sorted.forEach((p) => kdt.insert(p));
return kdt.export();
}
async function balanceArrayTree(places) {
const axes = ["lat", "long"];
function medianSplitSort(arr, depth) {
const axis = axes[depth % axes.length];
arr.sort((a, b) => a[axis] - b[axis]);
if (arr.length < 3) {
return arr;
}
const mid = Math.floor(arr.length / 2);
return [
arr[mid],
...medianSplitSort(arr.slice(0, mid), depth + 1),
...medianSplitSort(arr.slice(mid + 1), depth + 1),
];
}
const kdt = new KDArrayTree(...axes);
const sorted = medianSplitSort(places, 0);
sorted.forEach((p) => kdt.insert(p));
console.log(kdt.items.length);
await Deno.writeTextFile("kda-tree.txt", kdt.prettyPrint());
// return kdt.export();
}