@dcoffey/espells
Version:
Pure JS/TS spellchecker, using Hunspell dictionaries. Based on Spylls.
183 lines • 7 kB
JavaScript
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
import { CapType, CompoundPos } from "../constants.js";
import { Word } from "../dic/index.js";
import { replchars } from "../permutations.js";
import { any, isTriplet, isUppercased } from "../util.js";
import { affixForms } from "./affixes.js";
import { AffixForm } from "./forms.js";
import { LKFlags } from "./lk-flags.js";
/** Produces all valid {@link CompoundForm}s for a word. */
export function* compoundForms(word, allowNoSuggest = true) {
const aff = word.aff;
// don't even try to decompose a forbidden word
// TODO: this is incredibly slow, remove this
if (aff.FORBIDDENWORD) {
for (const candidate of affixForms(word, true, true)) {
if (candidate.flags.has(aff.FORBIDDENWORD))
return;
}
}
if (aff.COMPOUNDBEGIN || aff.COMPOUNDFLAG) {
for (const compound of compoundsByFlags(word, allowNoSuggest)) {
if (!isBadCompound(word, compound)) {
yield compound;
}
}
}
if (aff.COMPOUNDRULE.size) {
for (const compound of compoundsByRules(word)) {
if (!isBadCompound(word, compound)) {
yield compound;
}
}
}
}
/**
* Takes this word and yields the {@link CompoundForm}s of it using the
* `COMPOUNDFLAG`/`COMPOUNDBEGIN|MIDDLE|END` marker system.
*/
function* compoundsByFlags(word, allowNoSuggest = true, depth = 0) {
const aff = word.aff;
const forbiddenFlags = new Set();
const permitFlags = new Set();
if (aff.COMPOUNDFORBIDFLAG)
forbiddenFlags.add(aff.COMPOUNDFORBIDFLAG);
if (aff.COMPOUNDPERMITFLAG)
permitFlags.add(aff.COMPOUNDPERMITFLAG);
if (depth) {
const forms = affixForms(word.shift(CompoundPos.END), allowNoSuggest, false, new LKFlags({ prefix: permitFlags, forbidden: forbiddenFlags }));
for (const form of forms) {
yield [form];
}
}
if (word.length < aff.COMPOUNDMIN * 2)
return;
if (aff.COMPOUNDWORDMAX && depth > aff.COMPOUNDWORDMAX)
return;
const compoundpos = depth ? CompoundPos.MIDDLE : CompoundPos.BEGIN;
const prefixFlags = compoundpos === CompoundPos.BEGIN ? new Set() : permitFlags;
for (let pos = aff.COMPOUNDMIN; pos < word.length - aff.COMPOUNDMIN + 1; pos++) {
const beg = word.slice(0, pos);
beg.pos = compoundpos;
const rest = word.slice(pos);
rest.pos = compoundpos;
const flags = LKFlags.from(prefixFlags, permitFlags, forbiddenFlags);
for (const form of affixForms(beg, allowNoSuggest, false, flags)) {
for (const partial of compoundsByFlags(rest, allowNoSuggest, depth + 1)) {
yield [form, ...partial];
}
}
if (aff.SIMPLIFIEDTRIPLE && beg.at(-1) === rest.at(0)) {
const forms = affixForms(beg.add(beg.at(-1)), allowNoSuggest, false, flags);
for (const form of forms) {
for (const partial of compoundsByFlags(rest, allowNoSuggest, depth + 1)) {
yield [form.replace({ text: beg.word }), ...partial];
}
}
}
}
}
/**
* Takes this word and yields the {@link CompoundForm}s of it using the
* `COMPOUNDRULE` pattern system.
*/
function* compoundsByRules(word, prev = [], rules) {
const aff = word.aff;
const dic = word.dic;
if (!rules)
rules = [...aff.COMPOUNDRULE];
if (prev.length) {
for (const homonym of dic.homonyms(word.word)) {
const parts = [...prev, homonym];
const flagSets = Word.flagSets(parts);
if (rules.some(rule => rule.match(flagSets))) {
yield [new AffixForm(word)];
}
}
}
if (word.length < aff.COMPOUNDMIN * 2)
return;
if (aff.COMPOUNDWORDMAX && prev.length >= aff.COMPOUNDWORDMAX)
return;
for (let pos = aff.COMPOUNDMIN; pos < word.length - aff.COMPOUNDMIN + 1; pos++) {
const beg = word.slice(0, pos);
for (const homonynm of dic.homonyms(beg.word)) {
const parts = [...prev, homonynm];
const flagSets = Word.flagSets(parts);
const compoundRules = rules.filter(rule => rule.match(flagSets, true));
if (compoundRules.length) {
for (const rest of compoundsByRules(word.slice(pos), parts, compoundRules)) {
yield [new AffixForm(beg), ...rest];
}
}
}
}
}
/**
* Determines if a {@link CompoundForm} is invalid for a {@link LKWord}, by
* various criteria.
*
* @param word - The word to validate against.
* @param compound - The {@link CompoundForm} to check.
* @param captype - The {@link CapType} of the original word.
* @see {@link CompoundPattern}
*/
export function isBadCompound(word, compound) {
const aff = word.aff;
const dic = word.dic;
if (aff.FORCEUCASE && word.type !== CapType.ALL && word.type !== CapType.INIT) {
if (dic.hasFlag(compound[compound.length - 1].text, aff.FORCEUCASE)) {
return true;
}
}
return compound.slice(0, -1).some((leftParadigm, idx) => {
const left = leftParadigm.text;
const rightParadigm = compound[idx + 1];
const right = rightParadigm.text;
if (dic.hasFlag(left, aff.COMPOUNDFORBIDFLAG)) {
return true;
}
if (any(affixForms(word.to(`${left} ${right}`)))) {
return true;
}
if (aff.CHECKCOMPOUNDREP) {
for (const candidate of replchars(left + right, aff.REP)) {
if (typeof candidate !== "string")
continue;
if (any(affixForms(word.to(candidate)))) {
return true;
}
}
}
if (aff.CHECKCOMPOUNDTRIPLE) {
if (isTriplet(`${left.slice(-2)}${right.slice(0, 1)}`) ||
isTriplet(`${left.slice(-1)}${right.slice(0, 2)}`)) {
return true;
}
}
if (aff.CHECKCOMPOUNDCASE) {
const rightC = right[0];
const leftC = left[left.length - 1];
if ((isUppercased(rightC) || isUppercased(leftC)) &&
rightC !== "-" &&
leftC !== "-") {
return true;
}
}
if (aff.CHECKCOMPOUNDPATTERN.size) {
for (const pattern of aff.CHECKCOMPOUNDPATTERN) {
if (pattern.match(leftParadigm, rightParadigm)) {
return true;
}
}
}
if (aff.CHECKCOMPOUNDDUP) {
if (left === right && idx === compound.length - 2) {
return true;
}
}
});
}
//# sourceMappingURL=compounds.js.map