@palasimi/ipa-cluster
Version:
Cluster words with similar IPA transcriptions together
60 lines • 2.7 kB
JavaScript
;
// SPDX-License-Identifier: GPL-3.0-or-later
// Copyright (c) 2023 Levi Gruspe
// String metric/distance functions.
Object.defineProperty(exports, "__esModule", { value: true });
exports.levenshtein = void 0;
// Default cost function for the Levenshtein distance.
function defaultCost(s, t, i, j) {
const a = s[i];
const b = t[j];
// The cost of insertion/deletion.
if ((a == null && b != null) || (a != null && b == null)) {
return 1;
}
// The cost of substitution.
return a === b ? 0 : 1;
}
// Compute the weighted Levenshtein distance between two sequences.
// The options argument is passed to the cost function.
// It can be used to pass additional data to the cost function.
function levenshtein(s, t, cost = defaultCost, options = {}) {
// Initialize (|s| + 1)-by-(|t| + 1) matrix.
// We'll use 1-indexed strings here.
const distance = [];
for (let i = 0; i < s.length + 1; i++) {
distance.push(Array(t.length + 1).fill(0));
}
// Special case when either string is empty: the distance is just the total
// cost to turn the empty string into the other string.
// The cost is the total cost of insertions, because the only way to get the
// next substring is to insert the missing character.
for (let i = 0; i < s.length; i++) {
// `s` is 0-indexed, so we'll add one to make it 1-indexed (needed by
// `distance`).
distance[i + 1][0] = distance[i][0] + cost(s, t, i, -1, options);
}
for (let j = 0; j < t.length; j++) {
// `t` is 0-indexed, so we'll add one to make it 1-indexed (needed by
// `distance`).
distance[0][j + 1] = distance[0][j] + cost(s, t, -1, j, options);
}
for (let i = 0; i < s.length; i++) {
for (let j = 0; j < t.length; j++) {
if (s[i] === t[j]) {
// We add one, because `distance` is 1-indexed, while the strings are
// 0-indexed.
distance[i + 1][j + 1] = distance[i][j];
continue;
}
// There are three ways to get the next substring: by substitution, by
// inserting the character from `s`, or by inserting the character from `t`.
distance[i + 1][j + 1] = Math.min(distance[i][j] + cost(s, t, i, j, options), distance[i][j + 1] + cost(s, t, i, -j - 1, options), distance[i + 1][j] + cost(s, t, -i - 1, j, options));
}
}
// Recall that `distance` is 1-indexed, so we return the following instead of
// `distance[s.length - 1][t.length - 1]`.
return distance[s.length][t.length];
}
exports.levenshtein = levenshtein;
//# sourceMappingURL=metrics.js.map