@dcoffey/espells
Version:
Pure JS/TS spellchecker, using Hunspell dictionaries. Based on Spylls.
136 lines • 4.96 kB
JavaScript
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
import { iterate } from "iterare";
import { CONSTANTS as C } from "../constants.js";
import { lowercase } from "../util.js";
import { preciseAffixScore, rootScore, roughAffixScore, ScoresList, scoreThreshold } from "./scores.js";
export class NgramSuggestionBuilder {
constructor(
/** The misspelling that suggestions are being built for. */
misspelling,
/** A set of already known suggestions that should be skipped. */
known,
/**
* Sets the similarity factor.
*
* - `5`: The default value.
* - `0`: Fewer ngram suggestions, but always at least one.
* - `10`: Maximum value, yields `MAXNGRAMSUGS` number of suggestions.
*/
maxDiff,
/**
* If true, all bad ngram suggestions will be removed, rather than
* keeping at least one.
*/
onlyMaxDiff = false,
/**
* Produces less suggestions if true, so that phonetic suggestion
* system isn't skipped due to a large amount of ngram suggestions.
*/
hasPhonetic = false) {
Object.defineProperty(this, "misspelling", {
enumerable: true,
configurable: true,
writable: true,
value: misspelling
});
Object.defineProperty(this, "known", {
enumerable: true,
configurable: true,
writable: true,
value: known
});
Object.defineProperty(this, "maxDiff", {
enumerable: true,
configurable: true,
writable: true,
value: maxDiff
});
Object.defineProperty(this, "onlyMaxDiff", {
enumerable: true,
configurable: true,
writable: true,
value: onlyMaxDiff
});
Object.defineProperty(this, "hasPhonetic", {
enumerable: true,
configurable: true,
writable: true,
value: hasPhonetic
});
this.misspelling = misspelling;
this.known = known;
this.maxDiff = maxDiff;
this.onlyMaxDiff = onlyMaxDiff;
this.hasPhonetic = hasPhonetic;
this.roots = new ScoresList(C.NGRAM_MAX_ROOTS);
}
/**
* Steps the builder forward by providing another {@link Word} to process.
*
* @param word - The {@link Word} to process.
*/
step(word) {
if (Math.abs(word.stem.length - this.misspelling.length) > 4)
return;
let score = rootScore(this.misspelling, word.stem);
if (word.altSpellings?.size) {
for (const variant of word.altSpellings) {
score = Math.max(score, rootScore(this.misspelling, variant));
}
}
this.roots.add(score, word);
}
/** Finishes the builder and yields the resulting suggestions (as strings). */
*finish() {
const threshold = scoreThreshold(this.misspelling);
const guesses = new ScoresList(C.NGRAM_MAX_GUESSES);
for (const [root] of this.roots.finish()) {
if (root.altSpellings?.size) {
for (const variant of root.altSpellings) {
const lower = lowercase(variant);
const score = roughAffixScore(this.misspelling, variant);
if (score > threshold)
guesses.add(score, lower, root.stem);
}
}
for (const form of root.forms(this.misspelling)) {
const lower = lowercase(form);
const score = roughAffixScore(this.misspelling, form);
if (score > threshold)
guesses.add(score, lower, form);
}
}
const fact = this.maxDiff >= 0 ? (10 - this.maxDiff) / 5 : 1;
yield* this.filterGuesses(guesses.finish(([score, compared, real]) => [
preciseAffixScore(this.misspelling, compared, fact, score, this.hasPhonetic),
real
], true));
}
/**
* Filters out terrible guesses based on their score or if they were already known.
*
* @param guesses - A list of tuples, containing a score and guess, in that order.
*/
*filterGuesses(guesses) {
let seen = false;
let found = 0;
for (const [score, value] of guesses) {
if (seen && score <= 1000)
return;
if (score > 1000)
seen = true;
else if (score < -100) {
if (found > 0 || this.onlyMaxDiff)
return;
seen = true;
}
if (!iterate(this.known).some(word => word.includes(value))) {
found++;
yield value;
}
}
}
}
//# sourceMappingURL=ngram.js.map