UNPKG

@dcoffey/espells

Version:

Pure JS/TS spellchecker, using Hunspell dictionaries. Based on Spylls.

146 lines 6.05 kB
/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ import { iterate } from "iterare"; import { RepPattern } from "../aff/rep-pattern.js"; import { CONSTANTS as C } from "../constants.js"; import { split } from "../util.js"; /** A word as found in a {@link Dic} instance's index. */ export class Word { /** * @param line - The line from a `.dic` file to parse. Can also just be * treated as a "word" argument. * @param aff - {@link Aff} data to use. */ constructor(line, aff) { Object.defineProperty(this, "aff", { enumerable: true, configurable: true, writable: true, value: aff }); // a hack so that the split regex doesn't get confused with escapes line = line.replaceAll("\\/", "_||_"); const match = C.SPLIT_WORD_REGEX.exec(line); if (!match) throw new SyntaxError(`Invalid line in dictionary '${line}'`); let [, stem, flags, data] = match; stem = stem.replaceAll("_||_", "/"); stem = aff.ignore(stem); this.stem = stem; this.capType = aff.casing.guess(stem); if (flags) this.flags = aff.parseFlags(flags); if (data) { for (const keyvalue of split(data)) { const match = C.SPLIT_DATA_REGEX.exec(keyvalue); // key:value pair if (match) { const [, key, value] = match; this.data ?? (this.data = new Map()); const set = this.data.get(value) ?? new Set(); this.data.set(key, set.add(value)); // ph: misspellings if (key === "ph") { // pretty ph:prit* -> rep(prit, prett) if (value.endsWith("*")) { aff.REP.add(new RepPattern(value.slice(0, -2), stem.slice(0, -1))); } // happy ph:hepi->happi -> rep(hepi, happi) else if (value.includes("->")) { const [from, to] = value.split("->"); aff.REP.add(new RepPattern(from, to)); } // wednesday ph:wensday -> rep(wensday, wednesday) // and altSpelling added for ngram suggestions else { aff.REP.add(new RepPattern(value, stem)); this.altSpellings ?? (this.altSpellings = new Set()); this.altSpellings.add(value); } } } // morphology alias else if (/^\d+$/.test(keyvalue) && aff.AM[parseInt(keyvalue) - 1]) { for (const str in aff.AM[parseInt(keyvalue) - 1]) { this.data ?? (this.data = new Map()); const set = this.data.get(keyvalue) ?? new Set(); this.data.set(keyvalue, set.add(str)); } } } } } /** * Determines if this word has the given flag. * * @param flag - The flag to check for. Can be undefined, which will return false. */ has(flag) { if (flag === undefined) return false; return this.flags?.has(flag) ?? false; } /** * Returns the forms (permutations) of this {@link Word}, with all valid * suffixes and prefixes. * * @param similarTo - The string/word that the forms found should be similar to. */ forms(similarTo) { const res = [this.stem]; if (!this.flags) return res; const affixes = { prefixes: iterate(this.flags) .filter(flag => this.aff.PFX.has(flag)) .map(flag => this.aff.PFX.get(flag)) .flatten() .filter(prefix => prefix.relevant(this.stem)) .toSet(), suffixes: iterate(this.flags) .filter(flag => this.aff.SFX.has(flag)) .map(flag => this.aff.SFX.get(flag)) .flatten() .filter(suffix => suffix.relevant(this.stem)) .toSet(), }; const suffixes = iterate(affixes.suffixes) .filter(suffix => (similarTo ? similarTo.endsWith(suffix.add) : true)) .toArray(); const prefixes = iterate(affixes.prefixes) .filter(prefix => (similarTo ? similarTo.startsWith(prefix.add) : true)) .toArray(); const cross = iterate(prefixes) .map(prefix => iterate(suffixes) .filter(suffix => suffix.crossproduct && prefix.crossproduct) .map(suffix => [prefix, suffix]) .toArray()) .flatten() .toArray(); for (const suffix of suffixes) { const root = suffix.strip ? this.stem.slice(0, -suffix.strip.length) : this.stem; res.push(root + suffix.add); } for (const [prefix, suffix] of cross) { const root = suffix.strip ? this.stem.slice(prefix.strip.length, -suffix.strip.length) : this.stem.slice(prefix.strip.length); res.push(prefix.add + root + suffix.add); } for (const prefix of prefixes) { const root = this.stem.slice(prefix.strip.length); res.push(prefix.add + root); } return res; } /** * Utility function for generating a {@link FlagSet} from an array of words. * * @param words - The words to generate the flag set from. */ static flagSets(words) { return new Set(words.filter(word => word.flags).map(word => new Set(word.flags))); } } //# sourceMappingURL=word.js.map