UNPKG

@dcoffey/espells

Version:

Pure JS/TS spellchecker, using Hunspell dictionaries. Based on Spylls.

183 lines 7 kB
/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ import { CapType, CompoundPos } from "../constants.js"; import { Word } from "../dic/index.js"; import { replchars } from "../permutations.js"; import { any, isTriplet, isUppercased } from "../util.js"; import { affixForms } from "./affixes.js"; import { AffixForm } from "./forms.js"; import { LKFlags } from "./lk-flags.js"; /** Produces all valid {@link CompoundForm}s for a word. */ export function* compoundForms(word, allowNoSuggest = true) { const aff = word.aff; // don't even try to decompose a forbidden word // TODO: this is incredibly slow, remove this if (aff.FORBIDDENWORD) { for (const candidate of affixForms(word, true, true)) { if (candidate.flags.has(aff.FORBIDDENWORD)) return; } } if (aff.COMPOUNDBEGIN || aff.COMPOUNDFLAG) { for (const compound of compoundsByFlags(word, allowNoSuggest)) { if (!isBadCompound(word, compound)) { yield compound; } } } if (aff.COMPOUNDRULE.size) { for (const compound of compoundsByRules(word)) { if (!isBadCompound(word, compound)) { yield compound; } } } } /** * Takes this word and yields the {@link CompoundForm}s of it using the * `COMPOUNDFLAG`/`COMPOUNDBEGIN|MIDDLE|END` marker system. */ function* compoundsByFlags(word, allowNoSuggest = true, depth = 0) { const aff = word.aff; const forbiddenFlags = new Set(); const permitFlags = new Set(); if (aff.COMPOUNDFORBIDFLAG) forbiddenFlags.add(aff.COMPOUNDFORBIDFLAG); if (aff.COMPOUNDPERMITFLAG) permitFlags.add(aff.COMPOUNDPERMITFLAG); if (depth) { const forms = affixForms(word.shift(CompoundPos.END), allowNoSuggest, false, new LKFlags({ prefix: permitFlags, forbidden: forbiddenFlags })); for (const form of forms) { yield [form]; } } if (word.length < aff.COMPOUNDMIN * 2) return; if (aff.COMPOUNDWORDMAX && depth > aff.COMPOUNDWORDMAX) return; const compoundpos = depth ? CompoundPos.MIDDLE : CompoundPos.BEGIN; const prefixFlags = compoundpos === CompoundPos.BEGIN ? new Set() : permitFlags; for (let pos = aff.COMPOUNDMIN; pos < word.length - aff.COMPOUNDMIN + 1; pos++) { const beg = word.slice(0, pos); beg.pos = compoundpos; const rest = word.slice(pos); rest.pos = compoundpos; const flags = LKFlags.from(prefixFlags, permitFlags, forbiddenFlags); for (const form of affixForms(beg, allowNoSuggest, false, flags)) { for (const partial of compoundsByFlags(rest, allowNoSuggest, depth + 1)) { yield [form, ...partial]; } } if (aff.SIMPLIFIEDTRIPLE && beg.at(-1) === rest.at(0)) { const forms = affixForms(beg.add(beg.at(-1)), allowNoSuggest, false, flags); for (const form of forms) { for (const partial of compoundsByFlags(rest, allowNoSuggest, depth + 1)) { yield [form.replace({ text: beg.word }), ...partial]; } } } } } /** * Takes this word and yields the {@link CompoundForm}s of it using the * `COMPOUNDRULE` pattern system. */ function* compoundsByRules(word, prev = [], rules) { const aff = word.aff; const dic = word.dic; if (!rules) rules = [...aff.COMPOUNDRULE]; if (prev.length) { for (const homonym of dic.homonyms(word.word)) { const parts = [...prev, homonym]; const flagSets = Word.flagSets(parts); if (rules.some(rule => rule.match(flagSets))) { yield [new AffixForm(word)]; } } } if (word.length < aff.COMPOUNDMIN * 2) return; if (aff.COMPOUNDWORDMAX && prev.length >= aff.COMPOUNDWORDMAX) return; for (let pos = aff.COMPOUNDMIN; pos < word.length - aff.COMPOUNDMIN + 1; pos++) { const beg = word.slice(0, pos); for (const homonynm of dic.homonyms(beg.word)) { const parts = [...prev, homonynm]; const flagSets = Word.flagSets(parts); const compoundRules = rules.filter(rule => rule.match(flagSets, true)); if (compoundRules.length) { for (const rest of compoundsByRules(word.slice(pos), parts, compoundRules)) { yield [new AffixForm(beg), ...rest]; } } } } } /** * Determines if a {@link CompoundForm} is invalid for a {@link LKWord}, by * various criteria. * * @param word - The word to validate against. * @param compound - The {@link CompoundForm} to check. * @param captype - The {@link CapType} of the original word. * @see {@link CompoundPattern} */ export function isBadCompound(word, compound) { const aff = word.aff; const dic = word.dic; if (aff.FORCEUCASE && word.type !== CapType.ALL && word.type !== CapType.INIT) { if (dic.hasFlag(compound[compound.length - 1].text, aff.FORCEUCASE)) { return true; } } return compound.slice(0, -1).some((leftParadigm, idx) => { const left = leftParadigm.text; const rightParadigm = compound[idx + 1]; const right = rightParadigm.text; if (dic.hasFlag(left, aff.COMPOUNDFORBIDFLAG)) { return true; } if (any(affixForms(word.to(`${left} ${right}`)))) { return true; } if (aff.CHECKCOMPOUNDREP) { for (const candidate of replchars(left + right, aff.REP)) { if (typeof candidate !== "string") continue; if (any(affixForms(word.to(candidate)))) { return true; } } } if (aff.CHECKCOMPOUNDTRIPLE) { if (isTriplet(`${left.slice(-2)}${right.slice(0, 1)}`) || isTriplet(`${left.slice(-1)}${right.slice(0, 2)}`)) { return true; } } if (aff.CHECKCOMPOUNDCASE) { const rightC = right[0]; const leftC = left[left.length - 1]; if ((isUppercased(rightC) || isUppercased(leftC)) && rightC !== "-" && leftC !== "-") { return true; } } if (aff.CHECKCOMPOUNDPATTERN.size) { for (const pattern of aff.CHECKCOMPOUNDPATTERN) { if (pattern.match(leftParadigm, rightParadigm)) { return true; } } } if (aff.CHECKCOMPOUNDDUP) { if (left === right && idx === compound.length - 2) { return true; } } }); } //# sourceMappingURL=compounds.js.map