UNPKG

@dcoffey/espells

Version:

Pure JS/TS spellchecker, using Hunspell dictionaries. Based on Spylls.

157 lines 6.27 kB
/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ import { iterate } from "iterare"; import { Aff } from "./aff/index.js"; import { decoder } from "./constants.js"; import { Dic, Word } from "./dic/index.js"; import { Lookup } from "./lookup/index.js"; import { Reader } from "./reader.js"; import { Suggest } from "./suggest/index.js"; import { concat } from "./util.js"; /** * Espells spellchecker. Instances of this class are fully instantiated * when created and do not need any special init functions called. */ export class Espells { constructor({ aff, dic, override }) { // concatenate every dictionary // TODO: is the sorting an issue? if (!Array.isArray(dic)) dic = [dic]; dic = dic.reduce((acc, cur) => acc + (typeof cur === "string" ? cur : decoder.decode(cur)), ""); this.aff = new Aff(new Reader(aff), override); this.dic = new Dic(new Reader(dic), this.aff); this.lookuper = new Lookup(this.aff, this.dic); this.suggester = new Suggest(this.aff, this.dic, this.lookuper); } /** * Creates an {@link ESpells} instance from URL strings rather than raw * sources. Uses fetch, assumes the given URL can be decoded into a * string of text. */ static async fromURL(opts) { const aff = await (await fetch(opts.aff)).text(); const dicURLs = typeof opts.dic === "string" ? [opts.dic] : opts.dic; const dic = []; for (const url of dicURLs) { const data = await (await fetch(url)).text(); dic.push(data); } return new Espells({ ...opts, aff, dic }); } /** * Adds a dictionary (or array of dictionaries) to the current instance. * * @param dic - The dictionary (or dictionaries) to add. */ addDictionary(dic) { if (!Array.isArray(dic)) dic = [dic]; for (const input of dic) { this.dic.addDictionary(new Reader(input)); } } /** Adds a word to the spellchecker's dictionary. */ add(stem) { const word = new Word(stem, this.aff); this.dic.add(word); } /** Removes a word from the spellchecker's dictionary. */ remove(stem) { this.dic.remove(stem); } /** * Determines if a word meets three different criteria: * * - If the word is spelled correctly * - If the word has been marked as forbidden * - If the word has been marked as `WARN` * * These are the `correct`, `forbidden`, and `warn` properties of the * returned object, respectively. * * @param word - The word to check. * @param caseSensitive - If true, the spellchecker will consider the * capitalization of the word given. Defaults to true. */ lookup(word, caseSensitive = true) { return this.lookuper.check(word, caseSensitive); } /** * Returns suggestions for a word, even if it isn't misspelled. * * @param word - The word to get the suggestions of. * @param max - The maximum number of suggestions to return. Defaults to 8. */ suggest(word, max = 8) { return iterate(this.suggester.suggestions(word)) .take(max) .map(suggestion => suggestion.text) .toArray(); } /** * Returns the stems for a word, which are all of the potential "base * forms" of a word, which will have various suffixes or prefixes * attached to that base to make the given word. * * If the word given is misspelled, the array of stems returned will just be empty. * * @param word - The word to get the stems of. * @param caseSensitive - If true, the spellchecker will consider the * capitalization of the word given. Defaults to true. */ stems(word, caseSensitive = true) { if (!this.lookup(word).correct) return []; return iterate(this.lookuper.stems(word, { caps: caseSensitive })).toArray(); } /** * Returns the "morphological data" for a word. This data is basically * just a map of keys and values, representing some sort of metadata * attached to a stem. e.g. a potential key-value could be `is:gendered` * (for some languages), which could be checked like: * * ```ts * const gendered = * espells.data("word").get("is")?.has("gendered") ?? false * ``` * * The reason for there being a `Set` assigned to a key is because you * could have multiple values under the `"is"` key, like `is:x`, `is:y`, etc. * * It should also be noted that this function takes care to get every * stem of the word, and then merge the morphological data for every * stem, which is what is finally returned. If you want to take more care * than that, and get only the data attached to a specific stem, you * could use the {@link Espells.stems} function first, and use one of the * stems it returns. * * The last detail to mention is that if the word is misspelled, the map * returned will just be empty. * * @param word - The word to get the data of. * @param caseSensitive - If true, the spellchecker will consider the * capitalization of the word given. Defaults to true. */ data(word, caseSensitive = true) { // process: // * check if the word is correct // * get the stems of the word // * get the data maps for every homonym of the stems // * reduce every map into a single, merged map without overwriting anything if (!this.lookup(word).correct) return new Map(); return iterate(this.lookuper.stems(word, { caps: caseSensitive })) .map(stem => this.lookuper.data(stem, caseSensitive)) .reduce((acc, cur) => { iterate(cur) .flatten() .forEach(([key, set]) => { acc.set(key, concat(acc.get(key) ?? new Set(), set)); }); return acc; }, new Map()); } } //# sourceMappingURL=index.js.map