@palasimi/ipa-cluster
Version:
Cluster words with similar IPA transcriptions together
52 lines • 1.95 kB
JavaScript
;
// SPDX-License-Identifier: GPL-3.0-or-later
// Copyright (c) 2023 Levi Gruspe
Object.defineProperty(exports, "__esModule", { value: true });
exports.suggestEpsilon = exports.dbscan = void 0;
// Implements a simplified DBSCAN (clustering algorithm).
const mnemonist_1 = require("mnemonist");
// Simplified DBSCAN implementation with minPoints = 2.
// Returns array of arrays of clusters (as indices).
function dbscan(dataset, epsilon, distance) {
// When minPoints = 2, DBSCAN clusters are like connected components in a graph.
const sets = new mnemonist_1.StaticDisjointSet(dataset.length);
for (let i = 1; i < dataset.length; i++) {
for (let j = 0; j < i; j++) {
const a = dataset[j];
const b = dataset[i];
if (distance(a, b) <= epsilon) {
sets.union(i, j);
}
}
}
return sets.compile();
}
exports.dbscan = dbscan;
// Suggests an epsilon value to use with dbscan according to the "elbow"
// method.
function suggestEpsilon(dataset, distance) {
// A `VPTree` can be constructed in O(nlogn).
const tree = mnemonist_1.VPTree.from(dataset, distance);
// Compute distances of 2nd nearest neighbors.
// This takes around O(nlogn) (O(logn) for each data point).
const distances = [];
for (const data of dataset) {
const neighbors = tree.nearestNeighbors(2, data);
distances.push(neighbors[neighbors.length - 1].distance);
}
distances.sort((a, b) => a - b);
// Find the elbow/point with sharpest slope.
let maxSlope = 0;
let index = 0;
for (let i = 1; i < distances.length; i++) {
const slope = distances[i] - distances[i - 1];
if (slope > maxSlope) {
maxSlope = slope;
index = i;
}
}
// Suggest the elbow.
return distances[index] || 0;
}
exports.suggestEpsilon = suggestEpsilon;
//# sourceMappingURL=dbscan.js.map