@dcoffey/espells
Version:
Pure JS/TS spellchecker, using Hunspell dictionaries. Based on Spylls.
178 lines • 6.63 kB
JavaScript
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
import { iterate } from "iterare";
import { PriorityList } from "../plist.js";
import { commonCharacters, lcslen, leftCommonSubstring, lowercase, ngram } from "../util.js";
/**
* Special type of {@link PriorityList} that is intended to handle
* suggestion scores. Stores arrays of values with a score number, and
* limits the amount of entries within the list by removing the lowest
* scoring values.
*
* @typeParam T - The array value to be stored as an entry.
*/
export class ScoresList {
constructor(
/** The maximum number of entries in the list. */
max) {
Object.defineProperty(this, "max", {
enumerable: true,
configurable: true,
writable: true,
value: max
});
/** The internal list. */
Object.defineProperty(this, "list", {
enumerable: true,
configurable: true,
writable: true,
value: new PriorityList(ScoresList.heapCmp)
});
}
/**
* Adds an entry to the list.
*
* @param score - The score of the entry being added.
* @param args - The entry to add.
*/
add(score, ...args) {
const current = this.list.peek();
if (!current || score >= current[0]) {
this.list.push([score, ...args]);
if (this.list.length > this.max)
this.list.pop();
}
}
// actual signature
finish(map, keepScores) {
if (keepScores) {
return map
? iterate(this.list.data).map(map).toArray().sort(ScoresList.finishCmp)
: [...this.list.data].sort(ScoresList.finishCmp);
}
else {
return map
? iterate(this.list.data)
.map(map)
.toArray()
.sort(ScoresList.finishCmp)
.map(([, ...out]) => out)
: [...this.list.data].sort(ScoresList.finishCmp).map(([, ...out]) => out);
}
}
}
/** The comparator function that is used for the {@link PriorityList} instance. */
Object.defineProperty(ScoresList, "heapCmp", {
enumerable: true,
configurable: true,
writable: true,
value: (a, b) => a[0] - b[0]
});
/**
* The comparator function that is used when finalizing the list, which
* requires a sort of the list.
*/
Object.defineProperty(ScoresList, "finishCmp", {
enumerable: true,
configurable: true,
writable: true,
value: (a, b) => b[0] - a[0]
});
/**
* Simple scoring algorithm used for determining if a potential suggestion
* is a good one for the misspelling given.
*
* @param misspelling - The misspelled word.
* @param suggestion - The potential suggestion to determine the score of.
*/
export function rootScore(misspelling, suggestion) {
return (ngram(3, misspelling, lowercase(suggestion), false, false, true) +
leftCommonSubstring(misspelling, lowercase(suggestion)));
}
/**
* Simple scoring algorithm used for sorting a list of suggestions from
* closest matching to least matching.
*
* @param misspelling - The misspelled word.
* @param suggestion - The suggestion to determine the score of.
*/
export function finalScore(misspelling, suggestion) {
return (2 * lcslen(misspelling, suggestion) -
Math.abs(misspelling.length - suggestion.length) +
leftCommonSubstring(misspelling, suggestion));
}
/**
* Finds a minimum threshold for a decent suggestion.
*
* @param word - The word (or misspelling) to have a threshold generated for.
*/
export function scoreThreshold(word) {
let threshold = 0;
for (let startPos = 1; startPos < 4; startPos++) {
const mangled = [];
for (let pos = startPos; pos < word.length; pos += 4) {
mangled[pos] = "*";
}
const mangledWord = mangled.join("");
threshold += ngram(word.length, word, mangledWord, false, true);
}
return Math.floor(threshold / (3 - 1));
}
/**
* Simple and rough estimation of score for an affixed form.
*
* @param misspelling - The misspelled word.
* @param suggestion - The suggestion to determine the score of.
* @see {@link preciseAffixScore}
*/
export function roughAffixScore(misspelling, suggestion) {
return (ngram(misspelling.length, misspelling, suggestion, false, true) +
leftCommonSubstring(misspelling, suggestion));
}
/**
* Precise, mildly expensive (in comparison) scoring algorithm for affixed
* forms. This function tends to generate three groups:
*
* - 1000 or more: The misspelling and suggestion are the same with the only
* exception being casing.
* - -100 or less: The word difference is too great, as determined by
* `diffFactor` argument.
* - -100...1000: Normal suggestion scores.
*
* @param misspelling - The misspelled word.
* @param suggestion - The suggestion to determine the score of.
* @param diffFactor - An adjustment knob for changing the number of
* suggestions returned. A lower factor means that a suggestion must be
* of a decent confidence to actually be given to the user.
* @param base - The initial score between the misspelling and the suggestion.
* @param hasPhonetic - If true, this indicates that the spellchecker also
* has access a {@link PhonetTable}. This causes the scores to be adjusted
* slightly lower so that the {@link PhonetTable} is more "important".
*/
export function preciseAffixScore(misspelling, suggestion, diffFactor, base, hasPhonetic) {
const lcs = lcslen(misspelling, suggestion);
if (misspelling.length === suggestion.length && misspelling.length === lcs) {
return base + 2000;
}
let result;
result = 2 * lcs - Math.abs(misspelling.length - suggestion.length);
result += leftCommonSubstring(misspelling, suggestion);
if (commonCharacters(misspelling, lowercase(suggestion)))
result++;
result += ngram(4, misspelling, suggestion, false, true);
const bigrams = ngram(2, misspelling, suggestion, true, true) +
ngram(2, suggestion, misspelling, true, true);
result += bigrams;
let questionableLimit;
if (hasPhonetic) {
questionableLimit = misspelling.length * diffFactor;
}
else {
questionableLimit = (misspelling.length + suggestion.length) * diffFactor;
}
if (bigrams < questionableLimit)
result -= 1000;
return result;
}
//# sourceMappingURL=scores.js.map