UNPKG

@palasimi/ipa-cluster

Version:

Cluster words with similar IPA transcriptions together

27 lines 1.05 kB
"use strict"; // SPDX-License-Identifier: GPL-3.0-or-later // Copyright (c) 2023 Levi Gruspe Object.defineProperty(exports, "__esModule", { value: true }); exports.cluster = void 0; const dbscan_1 = require("./dbscan"); const utils_1 = require("./utils"); // Cluster words with similar IPA transcriptions together. function cluster(dataset, metric, options = {}) { // Tokenize dataset. const tokenized = dataset.map((p) => { return { ipa: p.ipa.split(" "), language: p.language, }; }); const precomputedDistance = (0, utils_1.precompute)(tokenized, metric); const indices = tokenized.map((_, i) => i); const epsilon = options.epsilon != null ? options.epsilon : (0, dbscan_1.suggestEpsilon)(indices, precomputedDistance); const clusters = (0, dbscan_1.dbscan)(indices, epsilon, precomputedDistance); // Convert indices to values. return clusters.map((cluster) => cluster.map((i) => dataset[i])); } exports.cluster = cluster; //# sourceMappingURL=cluster.js.map