@palasimi/ipa-cluster
Version:
Cluster words with similar IPA transcriptions together
92 lines • 3.2 kB
JavaScript
;
// SPDX-License-Identifier: GPL-3.0-or-later
// Copyright (c) 2023 Levi Gruspe
// Context-matching.
Object.defineProperty(exports, "__esModule", { value: true });
exports.ContextMatcher = void 0;
const trie_1 = require("./trie");
/**
* Context matcher.
*/
class ContextMatcher {
constructor() {
this.leftBeforeTrie = new trie_1.Trie();
this.leftAfterTrie = new trie_1.Trie();
this.rightBeforeTrie = new trie_1.Trie();
this.rightAfterTrie = new trie_1.Trie();
}
/**
* Adds a context to the `ContextMatcher`.
*/
add(context) {
// There are cases where a context/split rule does not need to be added.
// See `split()`.
// We'll check again here just to be sure.
const { left, right } = context;
if (left === right || left === "#" || right === "#") {
return;
}
const { constraint, leftBeforeContext, leftAfterContext, rightBeforeContext, rightAfterContext, } = context;
this.leftBeforeTrie.add(leftBeforeContext, constraint.left);
this.leftAfterTrie.add(leftAfterContext, constraint.left);
this.rightBeforeTrie.add(rightBeforeContext, constraint.right);
this.rightAfterTrie.add(rightAfterContext, constraint.right);
}
/**
* Tests if `s[i]` and `t[j]` match any of the stored contexts.
*
* @param s - An array of IPA segments
* @param t - An array of IPA segments
* @param i - Index to an element in `s`
* @param j - Index to an element in `t`
* @param l1 - Language of `s`
* @param l2 - Language of `t`
*/
test(s, t, i, j, l1, l2) {
let a = s[i] || "_";
let b = t[j] || "_";
// There's no need to match the context if the symbols are the same.
if (a === b) {
return true;
}
if (b < a) {
[s, t] = [t, s];
[i, j] = [j, i];
[a, b] = [b, a];
[l1, l2] = [l2, l1];
}
return (this.leftAfterTrie.test(extractAfter(s, i), l1) &&
this.rightAfterTrie.test(extractAfter(t, j), l2) &&
this.leftBeforeTrie.test(extractBefore(s, i), l1) &&
this.rightBeforeTrie.test(extractBefore(t, j), l2));
}
}
exports.ContextMatcher = ContextMatcher;
/**
* Extracts the strings before the given position.
* Negative indices are used to indicate that there's a deleted element near
* position `i`.
*
* The result is reversed so that the first element of the array is the nearest
* to `i` and the last element is the furthest.
* Also adds "#" at the end of the result (word boundary).
*/
function extractBefore(s, i) {
const result = i >= 0 ? s.slice(0, i) : s.slice(0, -i);
result.reverse();
result.push("#");
return result;
}
/**
* Extracts the strings after the given position.
* Negative indices are used to indicate that there's a deleted element near
* position `i`.
*
* Adds "#" at the end of the result to mark the end of the sequence.
*/
function extractAfter(s, i) {
const result = i >= 0 ? s.slice(i + 1) : s.slice(-i);
result.push("#");
return result;
}
//# sourceMappingURL=context.js.map