UNPKG

@palasimi/ipa-cluster

Version:

Cluster words with similar IPA transcriptions together

92 lines 3.2 kB
"use strict"; // SPDX-License-Identifier: GPL-3.0-or-later // Copyright (c) 2023 Levi Gruspe // Context-matching. Object.defineProperty(exports, "__esModule", { value: true }); exports.ContextMatcher = void 0; const trie_1 = require("./trie"); /** * Context matcher. */ class ContextMatcher { constructor() { this.leftBeforeTrie = new trie_1.Trie(); this.leftAfterTrie = new trie_1.Trie(); this.rightBeforeTrie = new trie_1.Trie(); this.rightAfterTrie = new trie_1.Trie(); } /** * Adds a context to the `ContextMatcher`. */ add(context) { // There are cases where a context/split rule does not need to be added. // See `split()`. // We'll check again here just to be sure. const { left, right } = context; if (left === right || left === "#" || right === "#") { return; } const { constraint, leftBeforeContext, leftAfterContext, rightBeforeContext, rightAfterContext, } = context; this.leftBeforeTrie.add(leftBeforeContext, constraint.left); this.leftAfterTrie.add(leftAfterContext, constraint.left); this.rightBeforeTrie.add(rightBeforeContext, constraint.right); this.rightAfterTrie.add(rightAfterContext, constraint.right); } /** * Tests if `s[i]` and `t[j]` match any of the stored contexts. * * @param s - An array of IPA segments * @param t - An array of IPA segments * @param i - Index to an element in `s` * @param j - Index to an element in `t` * @param l1 - Language of `s` * @param l2 - Language of `t` */ test(s, t, i, j, l1, l2) { let a = s[i] || "_"; let b = t[j] || "_"; // There's no need to match the context if the symbols are the same. if (a === b) { return true; } if (b < a) { [s, t] = [t, s]; [i, j] = [j, i]; [a, b] = [b, a]; [l1, l2] = [l2, l1]; } return (this.leftAfterTrie.test(extractAfter(s, i), l1) && this.rightAfterTrie.test(extractAfter(t, j), l2) && this.leftBeforeTrie.test(extractBefore(s, i), l1) && this.rightBeforeTrie.test(extractBefore(t, j), l2)); } } exports.ContextMatcher = ContextMatcher; /** * Extracts the strings before the given position. * Negative indices are used to indicate that there's a deleted element near * position `i`. * * The result is reversed so that the first element of the array is the nearest * to `i` and the last element is the furthest. * Also adds "#" at the end of the result (word boundary). */ function extractBefore(s, i) { const result = i >= 0 ? s.slice(0, i) : s.slice(0, -i); result.reverse(); result.push("#"); return result; } /** * Extracts the strings after the given position. * Negative indices are used to indicate that there's a deleted element near * position `i`. * * Adds "#" at the end of the result to mark the end of the sequence. */ function extractAfter(s, i) { const result = i >= 0 ? s.slice(i + 1) : s.slice(-i); result.push("#"); return result; } //# sourceMappingURL=context.js.map