UNPKG

@dcoffey/espells

Version:

Pure JS/TS spellchecker, using Hunspell dictionaries. Based on Spylls.

136 lines 4.96 kB
/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ import { iterate } from "iterare"; import { CONSTANTS as C } from "../constants.js"; import { lowercase } from "../util.js"; import { preciseAffixScore, rootScore, roughAffixScore, ScoresList, scoreThreshold } from "./scores.js"; export class NgramSuggestionBuilder { constructor( /** The misspelling that suggestions are being built for. */ misspelling, /** A set of already known suggestions that should be skipped. */ known, /** * Sets the similarity factor. * * - `5`: The default value. * - `0`: Fewer ngram suggestions, but always at least one. * - `10`: Maximum value, yields `MAXNGRAMSUGS` number of suggestions. */ maxDiff, /** * If true, all bad ngram suggestions will be removed, rather than * keeping at least one. */ onlyMaxDiff = false, /** * Produces less suggestions if true, so that phonetic suggestion * system isn't skipped due to a large amount of ngram suggestions. */ hasPhonetic = false) { Object.defineProperty(this, "misspelling", { enumerable: true, configurable: true, writable: true, value: misspelling }); Object.defineProperty(this, "known", { enumerable: true, configurable: true, writable: true, value: known }); Object.defineProperty(this, "maxDiff", { enumerable: true, configurable: true, writable: true, value: maxDiff }); Object.defineProperty(this, "onlyMaxDiff", { enumerable: true, configurable: true, writable: true, value: onlyMaxDiff }); Object.defineProperty(this, "hasPhonetic", { enumerable: true, configurable: true, writable: true, value: hasPhonetic }); this.misspelling = misspelling; this.known = known; this.maxDiff = maxDiff; this.onlyMaxDiff = onlyMaxDiff; this.hasPhonetic = hasPhonetic; this.roots = new ScoresList(C.NGRAM_MAX_ROOTS); } /** * Steps the builder forward by providing another {@link Word} to process. * * @param word - The {@link Word} to process. */ step(word) { if (Math.abs(word.stem.length - this.misspelling.length) > 4) return; let score = rootScore(this.misspelling, word.stem); if (word.altSpellings?.size) { for (const variant of word.altSpellings) { score = Math.max(score, rootScore(this.misspelling, variant)); } } this.roots.add(score, word); } /** Finishes the builder and yields the resulting suggestions (as strings). */ *finish() { const threshold = scoreThreshold(this.misspelling); const guesses = new ScoresList(C.NGRAM_MAX_GUESSES); for (const [root] of this.roots.finish()) { if (root.altSpellings?.size) { for (const variant of root.altSpellings) { const lower = lowercase(variant); const score = roughAffixScore(this.misspelling, variant); if (score > threshold) guesses.add(score, lower, root.stem); } } for (const form of root.forms(this.misspelling)) { const lower = lowercase(form); const score = roughAffixScore(this.misspelling, form); if (score > threshold) guesses.add(score, lower, form); } } const fact = this.maxDiff >= 0 ? (10 - this.maxDiff) / 5 : 1; yield* this.filterGuesses(guesses.finish(([score, compared, real]) => [ preciseAffixScore(this.misspelling, compared, fact, score, this.hasPhonetic), real ], true)); } /** * Filters out terrible guesses based on their score or if they were already known. * * @param guesses - A list of tuples, containing a score and guess, in that order. */ *filterGuesses(guesses) { let seen = false; let found = 0; for (const [score, value] of guesses) { if (seen && score <= 1000) return; if (score > 1000) seen = true; else if (score < -100) { if (found > 0 || this.onlyMaxDiff) return; seen = true; } if (!iterate(this.known).some(word => word.includes(value))) { found++; yield value; } } } } //# sourceMappingURL=ngram.js.map