@palasimi/ipa-cluster
Version:
Cluster words with similar IPA transcriptions together
27 lines • 1.05 kB
JavaScript
;
// SPDX-License-Identifier: GPL-3.0-or-later
// Copyright (c) 2023 Levi Gruspe
Object.defineProperty(exports, "__esModule", { value: true });
exports.cluster = void 0;
const dbscan_1 = require("./dbscan");
const utils_1 = require("./utils");
// Cluster words with similar IPA transcriptions together.
function cluster(dataset, metric, options = {}) {
// Tokenize dataset.
const tokenized = dataset.map((p) => {
return {
ipa: p.ipa.split(" "),
language: p.language,
};
});
const precomputedDistance = (0, utils_1.precompute)(tokenized, metric);
const indices = tokenized.map((_, i) => i);
const epsilon = options.epsilon != null
? options.epsilon
: (0, dbscan_1.suggestEpsilon)(indices, precomputedDistance);
const clusters = (0, dbscan_1.dbscan)(indices, epsilon, precomputedDistance);
// Convert indices to values.
return clusters.map((cluster) => cluster.map((i) => dataset[i]));
}
exports.cluster = cluster;
//# sourceMappingURL=cluster.js.map