@dcoffey/espells
Version:
Pure JS/TS spellchecker, using Hunspell dictionaries. Based on Spylls.
290 lines • 13.4 kB
JavaScript
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
import { iterate } from "iterare";
import { CapType, CONSTANTS as C, GOOD_EDITS, SuggestionKind } from "../constants.js";
import { badchar, badcharkey, doubletwochars, extrachar, forgotchar, longswapchar, mapchars, movechar, replchars, swapchar, twowords } from "../permutations.js";
import { intersect, limit, lowercase, uppercase } from "../util.js";
import { NgramSuggestionBuilder } from "./ngram.js";
import { PhonetSuggestionBuilder } from "./phonet.js";
import { MultiWordSuggestion, Suggestion } from "./suggestion.js";
export class Suggest {
/**
* @param aff - {@link Aff} data to use.
* @param dic - {@link Dic} data to use.
* @param lookup - {@link Lookup} instance to use for filtering and
* verifying suggestions.
*/
constructor(aff, dic, lookup) {
this.aff = aff;
this.dic = dic;
this.lookup = lookup;
const badFlags = iterate([aff.FORBIDDENWORD, aff.NOSUGGEST, aff.ONLYINCOMPOUND])
.filter(flag => Boolean(flag))
.toSet();
this.ngramWords = iterate(dic.words)
.filter(word => (!word.flags ? true : intersect(word.flags, badFlags).size === 0))
.toSet();
// TODO: fix this - this is dumb but this is legit how Hunspell does it
this.dashes = aff.TRY.includes("-") || aff.TRY.includes("a");
}
/**
* Yields {@link Suggestion} instances for the given word, even if it is
* spelled correctly.
*
* @param word - The word to get the suggestions for.
*/
*suggestions(word) {
const handled = new Set();
const [captype, ...variants] = this.aff.casing.corrections(word);
const handle = (suggestion, checkInclusion = false) => this.handle(word, captype, handled, suggestion, checkInclusion);
if (this.aff.FORCEUCASE && captype === CapType.NO) {
for (const capitalized of this.aff.casing.capitalize(word)) {
if (this.correct(capitalized)) {
const suggestion = handle(new Suggestion(capitalized, SuggestionKind.FORCEUCASE));
if (suggestion)
yield suggestion;
return;
}
}
}
let goodEditsFound = false;
for (let idx = 0; idx < variants.length; idx++) {
const variant = variants[idx];
if (idx > 0 && this.correct(variant)) {
const suggestion = handle(new Suggestion(variant, SuggestionKind.CASE));
if (suggestion)
yield suggestion;
}
let noCompound = false;
for (const suggestion of this.edits(variant, handle, C.MAX_SUGGESTIONS, false)) {
yield suggestion;
goodEditsFound || (goodEditsFound = GOOD_EDITS.includes(suggestion.kind));
// prettier-ignore
switch (suggestion.kind) {
case SuggestionKind.UPPERCASE:
case SuggestionKind.REPLCHARS:
case SuggestionKind.MAPCHARS: {
noCompound = true;
break;
}
case SuggestionKind.SPACEWORD: return;
}
}
if (!noCompound) {
for (const suggestion of this.edits(word, handle, this.aff.MAXCPDSUGS, true)) {
yield suggestion;
goodEditsFound || (goodEditsFound = GOOD_EDITS.includes(suggestion.kind));
}
}
if (goodEditsFound)
return;
if (word.includes("-") && !iterate(handled).some(word => word.includes("-"))) {
const chunks = word.split("-");
for (let idx = 0; idx < chunks.length; idx++) {
const chunk = chunks[idx];
if (!this.correct(chunk)) {
for (const suggestion of this.suggestions(chunk)) {
const candidate = [
...chunks.slice(0, idx),
suggestion.text,
...chunks.slice(idx + 1)
].join("-");
if (this.lookup.check(candidate)) {
yield new Suggestion(candidate, SuggestionKind.DASHES);
}
}
}
}
}
if (this.aff.MAXNGRAMSUGS || this.aff.PHONE) {
const ngram = this.aff.MAXNGRAMSUGS ? this.ngramBuilder(word, handled) : null;
const phonet = this.aff.PHONE ? this.phonetBuilder(word) : null;
for (const word of this.ngramWords) {
if (ngram)
ngram.step(word);
if (phonet)
phonet.step(word);
}
if (ngram) {
yield* iterate(ngram.finish())
.take(this.aff.MAXNGRAMSUGS)
.map(suggestion => handle(new Suggestion(suggestion, SuggestionKind.NGRAM), true))
.filter(suggestion => suggestion !== undefined);
}
if (phonet) {
yield* iterate(phonet.finish())
.take(C.MAX_PHONET_SUGGESTIONS)
.map(suggestion => handle(new Suggestion(suggestion, SuggestionKind.PHONET)))
.filter(suggestion => suggestion !== undefined);
}
}
}
}
/**
* Yields various correct {@link Suggestion} instances that were found by
* transforming the given word using various simple "edit" functions.
* e.g. this may involve breaking the word apart at various points,
* shifting characters around, removing characters, etc.
*
* @param word - The word to apply the "edits" transformations to.
* @param handle - The {@link Handler} instance to yield every {@link Suggestion} to.
* @param limit - The maximum number of correct {@link Suggestion}
* instances to yield.
* @param compounds - If provided, false will yield only suggestions
* found from {@link AffixForm}s, and true will yield only suggestions
* found from {@link CompoundForm}s.
*/
*edits(word, handle, limit, compounds) {
yield* iterate(this.filter(this.permutations(word), compounds))
.map(suggestion => handle(suggestion))
.filter(suggestion => suggestion !== undefined)
.take(limit);
}
/**
* Filters {@link Suggestion} and {@link MultiWordSuggestion} instances.
* This process involves splitting out the {@link MultiWordSuggestion}
* instances into a few forms, and making sure that every
* {@link Suggestion} that will be yielded is correct.
*
* @param suggestions - The iterator that should have its resultant
* suggestions filtered.
* @param compounds - If provided, false will yield only suggestions
* found from {@link AffixForm}s, and true will yield only suggestions
* found from {@link CompoundForm}s.
*/
*filter(suggestions, compounds) {
for (const suggestion of suggestions) {
if (suggestion instanceof MultiWordSuggestion) {
if (suggestion.words.every(word => this.correct(word, compounds))) {
yield suggestion.stringify();
if (suggestion.allowDash)
yield suggestion.stringify("-");
}
}
else if (this.correct(suggestion.text, compounds)) {
yield suggestion;
}
}
}
// -- MISC.
/**
* Base function that a {@link Handler} can be made from.
*
* @param word - The word to compare the given {@link Suggestion} to.
* @param captype - The {@link CapType} of the word.
* @param handled - The set of already handled words and stems.
* @param suggestion - The {@link Suggestion} to handle.
* @param checkInclusion - If true, the {@link Suggestion} text will be
* checked for if it can be found in its entirety inside of any the
* previously handled suggestions. Not just in the set, but if it can
* be found even as a substring. Defaults to false.
*/
handle(word, captype, handled, suggestion, checkInclusion = false) {
let text = suggestion.text;
if (!this.dic.hasFlag(text, this.aff.KEEPCASE) || this.aff.isSharps(text)) {
text = this.aff.casing.coerce(text, captype);
// revert if forbidden
if (text !== suggestion.text && this.lookup.isForbidden(text)) {
text = suggestion.text;
}
if (captype === CapType.HUH || captype === CapType.HUHINIT) {
const pos = text.indexOf(" ");
if (pos !== -1) {
if (text[pos + 1] !== word[pos] && uppercase(text[pos + 1]) === word[pos]) {
text = text.slice(0, pos + 1) + word[pos] + word.slice(pos + 2);
}
}
}
}
if (this.lookup.isForbidden(text))
return;
if (this.aff.OCONV)
text = this.aff.OCONV.match(text);
if (handled.has(text))
return;
if (checkInclusion &&
iterate(handled).some(prev => lowercase(text).includes(lowercase(prev)))) {
return;
}
handled.add(text);
return suggestion.replace(text);
}
/**
* Yields every permutation (as {@link Suggestion} or
* {@link MultiWordSuggestion} instances) of a word processed through
* *many* different simple transformation functions.
*
* @param word - The word to yield the permutations of.
*/
*permutations(word) {
yield new Suggestion(this.aff.casing.upper(word), SuggestionKind.UPPERCASE);
for (const suggestion of limit(replchars(word, this.aff.REP), C.MAX_PERMUTATIONS)) {
if (Array.isArray(suggestion)) {
yield new Suggestion(suggestion.join(" "), SuggestionKind.REPLCHARS);
yield new MultiWordSuggestion(suggestion, SuggestionKind.REPLCHARS, false);
}
else {
yield new Suggestion(suggestion, SuggestionKind.REPLCHARS);
}
}
for (const words of limit(twowords(word), C.MAX_PERMUTATIONS)) {
yield new Suggestion(words.join(" "), SuggestionKind.SPACEWORD);
if (this.dashes)
yield new Suggestion(words.join("-"), SuggestionKind.SPACEWORD);
}
// prettier-ignore
{
yield* this.pmtFrom(mapchars(word, this.aff.MAP), SuggestionKind.MAPCHARS);
yield* this.pmtFrom(swapchar(word), SuggestionKind.SWAPCHAR);
yield* this.pmtFrom(longswapchar(word), SuggestionKind.LONGSWAPCHAR);
yield* this.pmtFrom(badcharkey(word, this.aff.KEY), SuggestionKind.BADCHARKEY);
yield* this.pmtFrom(extrachar(word), SuggestionKind.EXTRACHAR);
yield* this.pmtFrom(forgotchar(word, this.aff.TRY), SuggestionKind.FORGOTCHAR);
yield* this.pmtFrom(movechar(word), SuggestionKind.MOVECHAR);
yield* this.pmtFrom(badchar(word, this.aff.TRY), SuggestionKind.BADCHAR);
yield* this.pmtFrom(doubletwochars(word), SuggestionKind.DOUBLETWOCHARS);
}
if (!this.aff.NOSPLITSUGS) {
for (const suggestionPair of limit(twowords(word), C.MAX_PERMUTATIONS)) {
yield new MultiWordSuggestion(suggestionPair, SuggestionKind.TWOWORDS, this.dashes);
}
}
}
// -- UTILITY
/**
* Helper for checking if a word is correct using some preconfigured
* settings specific to the {@link Suggest} class.
*/
correct(word, compounds) {
if (compounds !== undefined) {
return this.lookup.correct(word, {
caps: false,
allowNoSuggest: false,
affixForms: !compounds,
compoundForms: !!compounds
});
}
else {
return this.lookup.correct(word, { allowNoSuggest: false, caps: false });
}
}
/**
* Helper for yielding {@link Suggestion} instances from a iterator that
* yields strings.
*/
*pmtFrom(iter, kind) {
for (const suggestion of limit(iter, C.MAX_PERMUTATIONS)) {
yield new Suggestion(suggestion, kind);
}
}
/** Returns a preconfigured {@link NgramSuggestionBuilder}. */
ngramBuilder(word, handled) {
return new NgramSuggestionBuilder(lowercase(word), iterate(handled).map(lowercase).toSet(), this.aff.MAXDIFF, this.aff.ONLYMAXDIFF, Boolean(this.aff.PHONE));
}
/** Yields a preconfigured {@link PhonetSuggestionBuilder}. */
phonetBuilder(word) {
return new PhonetSuggestionBuilder(word, this.aff.PHONE);
}
}
//# sourceMappingURL=index.js.map