UNPKG

@dcoffey/espells

Version:

Pure JS/TS spellchecker, using Hunspell dictionaries. Based on Spylls.

178 lines 6.63 kB
/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ import { iterate } from "iterare"; import { PriorityList } from "../plist.js"; import { commonCharacters, lcslen, leftCommonSubstring, lowercase, ngram } from "../util.js"; /** * Special type of {@link PriorityList} that is intended to handle * suggestion scores. Stores arrays of values with a score number, and * limits the amount of entries within the list by removing the lowest * scoring values. * * @typeParam T - The array value to be stored as an entry. */ export class ScoresList { constructor( /** The maximum number of entries in the list. */ max) { Object.defineProperty(this, "max", { enumerable: true, configurable: true, writable: true, value: max }); /** The internal list. */ Object.defineProperty(this, "list", { enumerable: true, configurable: true, writable: true, value: new PriorityList(ScoresList.heapCmp) }); } /** * Adds an entry to the list. * * @param score - The score of the entry being added. * @param args - The entry to add. */ add(score, ...args) { const current = this.list.peek(); if (!current || score >= current[0]) { this.list.push([score, ...args]); if (this.list.length > this.max) this.list.pop(); } } // actual signature finish(map, keepScores) { if (keepScores) { return map ? iterate(this.list.data).map(map).toArray().sort(ScoresList.finishCmp) : [...this.list.data].sort(ScoresList.finishCmp); } else { return map ? iterate(this.list.data) .map(map) .toArray() .sort(ScoresList.finishCmp) .map(([, ...out]) => out) : [...this.list.data].sort(ScoresList.finishCmp).map(([, ...out]) => out); } } } /** The comparator function that is used for the {@link PriorityList} instance. */ Object.defineProperty(ScoresList, "heapCmp", { enumerable: true, configurable: true, writable: true, value: (a, b) => a[0] - b[0] }); /** * The comparator function that is used when finalizing the list, which * requires a sort of the list. */ Object.defineProperty(ScoresList, "finishCmp", { enumerable: true, configurable: true, writable: true, value: (a, b) => b[0] - a[0] }); /** * Simple scoring algorithm used for determining if a potential suggestion * is a good one for the misspelling given. * * @param misspelling - The misspelled word. * @param suggestion - The potential suggestion to determine the score of. */ export function rootScore(misspelling, suggestion) { return (ngram(3, misspelling, lowercase(suggestion), false, false, true) + leftCommonSubstring(misspelling, lowercase(suggestion))); } /** * Simple scoring algorithm used for sorting a list of suggestions from * closest matching to least matching. * * @param misspelling - The misspelled word. * @param suggestion - The suggestion to determine the score of. */ export function finalScore(misspelling, suggestion) { return (2 * lcslen(misspelling, suggestion) - Math.abs(misspelling.length - suggestion.length) + leftCommonSubstring(misspelling, suggestion)); } /** * Finds a minimum threshold for a decent suggestion. * * @param word - The word (or misspelling) to have a threshold generated for. */ export function scoreThreshold(word) { let threshold = 0; for (let startPos = 1; startPos < 4; startPos++) { const mangled = []; for (let pos = startPos; pos < word.length; pos += 4) { mangled[pos] = "*"; } const mangledWord = mangled.join(""); threshold += ngram(word.length, word, mangledWord, false, true); } return Math.floor(threshold / (3 - 1)); } /** * Simple and rough estimation of score for an affixed form. * * @param misspelling - The misspelled word. * @param suggestion - The suggestion to determine the score of. * @see {@link preciseAffixScore} */ export function roughAffixScore(misspelling, suggestion) { return (ngram(misspelling.length, misspelling, suggestion, false, true) + leftCommonSubstring(misspelling, suggestion)); } /** * Precise, mildly expensive (in comparison) scoring algorithm for affixed * forms. This function tends to generate three groups: * * - 1000 or more: The misspelling and suggestion are the same with the only * exception being casing. * - -100 or less: The word difference is too great, as determined by * `diffFactor` argument. * - -100...1000: Normal suggestion scores. * * @param misspelling - The misspelled word. * @param suggestion - The suggestion to determine the score of. * @param diffFactor - An adjustment knob for changing the number of * suggestions returned. A lower factor means that a suggestion must be * of a decent confidence to actually be given to the user. * @param base - The initial score between the misspelling and the suggestion. * @param hasPhonetic - If true, this indicates that the spellchecker also * has access a {@link PhonetTable}. This causes the scores to be adjusted * slightly lower so that the {@link PhonetTable} is more "important". */ export function preciseAffixScore(misspelling, suggestion, diffFactor, base, hasPhonetic) { const lcs = lcslen(misspelling, suggestion); if (misspelling.length === suggestion.length && misspelling.length === lcs) { return base + 2000; } let result; result = 2 * lcs - Math.abs(misspelling.length - suggestion.length); result += leftCommonSubstring(misspelling, suggestion); if (commonCharacters(misspelling, lowercase(suggestion))) result++; result += ngram(4, misspelling, suggestion, false, true); const bigrams = ngram(2, misspelling, suggestion, true, true) + ngram(2, suggestion, misspelling, true, true); result += bigrams; let questionableLimit; if (hasPhonetic) { questionableLimit = misspelling.length * diffFactor; } else { questionableLimit = (misspelling.length + suggestion.length) * diffFactor; } if (bigrams < questionableLimit) result -= 1000; return result; } //# sourceMappingURL=scores.js.map