UNPKG

@dcoffey/espells

Version:

Pure JS/TS spellchecker, using Hunspell dictionaries. Based on Spylls.

601 lines 21 kB
/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ import { iterate } from "iterare"; import { CONSTANTS as C } from "../constants.js"; import { Trie } from "../trie.js"; import { escapeRegExp, re, reverse, split } from "../util.js"; import { Prefix, Suffix } from "./affix.js"; import { Casing, GermanCasing, TurkicCasing } from "./casing.js"; import { CompoundPattern } from "./compound-pattern.js"; import { CompoundRule } from "./compound-rule.js"; import { ConvTable } from "./conv-table.js"; import { PhonetTable } from "./phonet-table.js"; import { RepPattern } from "./rep-pattern.js"; /** A resolved and parsed representation of the data found in a Hunspell `.aff` file. */ export class Aff { /** * @param reader - The {@link Reader} instance to parse with. * @param override - An optional object which allows for overriding * whatever {@link AffData} that was parsed with something else. */ constructor(reader, override) { // check AffData for descriptions of these properties Object.defineProperty(this, "SET", { enumerable: true, configurable: true, writable: true, value: "UTF-8" }); // unused Object.defineProperty(this, "FLAG", { enumerable: true, configurable: true, writable: true, value: "short" }); Object.defineProperty(this, "LANG", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "WORDCHARS", { enumerable: true, configurable: true, writable: true, value: void 0 }); // unused Object.defineProperty(this, "IGNORE", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "CHECKSHARPS", { enumerable: true, configurable: true, writable: true, value: false }); Object.defineProperty(this, "FORBIDDENWORD", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "KEY", { enumerable: true, configurable: true, writable: true, value: "qwertyuiop|asdfghjkl|zxcvbnm" }); Object.defineProperty(this, "TRY", { enumerable: true, configurable: true, writable: true, value: "" }); Object.defineProperty(this, "NOSUGGEST", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "KEEPCASE", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "REP", { enumerable: true, configurable: true, writable: true, value: new Set() }); Object.defineProperty(this, "MAP", { enumerable: true, configurable: true, writable: true, value: new Set() }); Object.defineProperty(this, "NOSPLITSUGS", { enumerable: true, configurable: true, writable: true, value: false }); Object.defineProperty(this, "PHONE", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "MAXCPDSUGS", { enumerable: true, configurable: true, writable: true, value: 3 }); Object.defineProperty(this, "MAXNGRAMSUGS", { enumerable: true, configurable: true, writable: true, value: 4 }); Object.defineProperty(this, "MAXDIFF", { enumerable: true, configurable: true, writable: true, value: 1 }); Object.defineProperty(this, "ONLYMAXDIFF", { enumerable: true, configurable: true, writable: true, value: false }); Object.defineProperty(this, "PFX", { enumerable: true, configurable: true, writable: true, value: new Map() }); Object.defineProperty(this, "SFX", { enumerable: true, configurable: true, writable: true, value: new Map() }); Object.defineProperty(this, "NEEDAFFIX", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "CIRCUMFIX", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "COMPLEXPREFIXES", { enumerable: true, configurable: true, writable: true, value: false }); Object.defineProperty(this, "FULLSTRIP", { enumerable: true, configurable: true, writable: true, value: false }); Object.defineProperty(this, "BREAK", { enumerable: true, configurable: true, writable: true, value: C.DEFAULT_BREAK }); Object.defineProperty(this, "COMPOUNDRULE", { enumerable: true, configurable: true, writable: true, value: new Set() }); Object.defineProperty(this, "COMPOUNDMIN", { enumerable: true, configurable: true, writable: true, value: 3 }); Object.defineProperty(this, "COMPOUNDWORDMAX", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "COMPOUNDFLAG", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "COMPOUNDBEGIN", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "COMPOUNDMIDDLE", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "COMPOUNDEND", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "ONLYINCOMPOUND", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "COMPOUNDPERMITFLAG", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "COMPOUNDFORBIDFLAG", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "FORCEUCASE", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "CHECKCOMPOUNDCASE", { enumerable: true, configurable: true, writable: true, value: false }); Object.defineProperty(this, "CHECKCOMPOUNDDUP", { enumerable: true, configurable: true, writable: true, value: false }); Object.defineProperty(this, "CHECKCOMPOUNDREP", { enumerable: true, configurable: true, writable: true, value: false }); Object.defineProperty(this, "CHECKCOMPOUNDTRIPLE", { enumerable: true, configurable: true, writable: true, value: false }); Object.defineProperty(this, "CHECKCOMPOUNDPATTERN", { enumerable: true, configurable: true, writable: true, value: new Set() }); Object.defineProperty(this, "SIMPLIFIEDTRIPLE", { enumerable: true, configurable: true, writable: true, value: false }); Object.defineProperty(this, "COMPOUNDSYLLABLE", { enumerable: true, configurable: true, writable: true, value: void 0 }); // unused Object.defineProperty(this, "COMPOUNDMORESUFFIXES", { enumerable: true, configurable: true, writable: true, value: false }); // unused Object.defineProperty(this, "COMPOUNDROOT", { enumerable: true, configurable: true, writable: true, value: void 0 }); // unused Object.defineProperty(this, "ICONV", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "OCONV", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "AF", { enumerable: true, configurable: true, writable: true, value: [] }); Object.defineProperty(this, "AM", { enumerable: true, configurable: true, writable: true, value: [] }); Object.defineProperty(this, "WARN", { enumerable: true, configurable: true, writable: true, value: void 0 }); Object.defineProperty(this, "FORBIDWARN", { enumerable: true, configurable: true, writable: true, value: false }); Object.defineProperty(this, "SYLLABLENUM", { enumerable: true, configurable: true, writable: true, value: void 0 }); // unused Object.defineProperty(this, "SUBSTANDARD", { enumerable: true, configurable: true, writable: true, value: void 0 }); // unused do { if (reader.done) break; let [directive, ...args] = split(reader.line); // skip directive if it doesn't seem real if (!/^[A-Z]+$/.test(directive)) continue; if (C.SYNONYMS.hasOwnProperty(directive)) directive = C.SYNONYMS[directive]; switch (directive) { case "SET": case "KEY": case "TRY": case "WORDCHARS": case "LANG": { this[directive] = args[0]; break; } case "FLAG": { if (args[0] === "short" || args[0] === "long" || args[0] === "numeric" || args[0] === "UTF-8") { this.FLAG = args[0]; } // try alternatives else if (args[0] === "num") { this.FLAG = "numeric"; } break; } case "IGNORE": { this.IGNORE = new Set([...args].map(str => str.split("")).flat()); break; } case "MAXDIFF": case "MAXNGRAMSUGS": case "MAXCPDSUGS": case "COMPOUNDMIN": case "COMPOUNDWORDMAX": { this[directive] = parseInt(args[0]); break; } case "NOSUGGEST": case "KEEPCASE": case "CIRCUMFIX": case "NEEDAFFIX": case "FORBIDDENWORD": case "WARN": case "COMPOUNDFLAG": case "COMPOUNDBEGIN": case "COMPOUNDMIDDLE": case "COMPOUNDEND": case "ONLYINCOMPOUND": case "COMPOUNDPERMITFLAG": case "COMPOUNDFORBIDFLAG": case "FORCEUCASE": case "SUBSTANDARD": case "SYLLABLENUM": case "COMPOUNDROOT": { this[directive] = this.parseFlag(args[0]); break; } case "COMPLEXPREFIXES": case "FULLSTRIP": case "NOSPLITSUGS": case "CHECKSHARPS": case "CHECKCOMPOUNDCASE": case "CHECKCOMPOUNDDUP": case "CHECKCOMPOUNDREP": case "CHECKCOMPOUNDTRIPLE": case "SIMPLIFIEDTRIPLE": case "ONLYMAXDIFF": case "COMPOUNDMORESUFFIXES": case "FORBIDWARN": { this[directive] = true; break; } case "BREAK": { this.BREAK = new Set(); reader.for(parseInt(args[0]), line => { let [, pattern] = split(line); pattern = escapeRegExp(pattern).replaceAll("\\^", "^").replaceAll("\\$", "$"); if (!pattern.startsWith("^") && !pattern.endsWith("$")) { pattern = `(?!^)${pattern}(?=.)`; } this.BREAK.add(re `/${pattern}/g`); }); break; } case "COMPOUNDRULE": { reader.for(parseInt(args[0]), line => { const [, value] = split(line); this.COMPOUNDRULE.add(new CompoundRule(value, this)); }); break; } case "ICONV": case "OCONV": { const pairs = []; reader.for(parseInt(args[0]), line => { const [, pattern, replacement] = split(line); pairs.push([pattern, replacement]); }); this[directive] = new ConvTable(pairs); break; } case "REP": { reader.for(parseInt(args[0]), line => { const [, pattern, replacement] = split(line); this.REP.add(new RepPattern(pattern, replacement)); }); break; } case "MAP": { reader.for(parseInt(args[0]), line => { const [, value] = split(line); this.MAP.add(iterate(value.matchAll(/\(.*?\)|./g)) .map(match => match[0].replaceAll(/^\(|\)$/g, "")) .toSet()); }); break; } // TODO: PFX, SFX data fields (see morph fixture) case "PFX": { const [flag, crossproduct, count] = args; reader.for(parseInt(count), line => { const [, , strip, add, cond = ""] = split(line); const prefix = new Prefix(flag, crossproduct, strip, add, cond, this); const set = this.PFX.get(flag) ?? new Set(); this.PFX.set(flag, set.add(prefix)); }); break; } case "SFX": { const [flag, crossproduct, count] = args; reader.for(parseInt(count), line => { const [, , strip, add, cond = ""] = split(line); const suffix = new Suffix(flag, crossproduct, strip, add, cond, this); const set = this.SFX.get(flag) ?? new Set(); this.SFX.set(flag, set.add(suffix)); }); break; } case "CHECKCOMPOUNDPATTERN": { reader.for(parseInt(args[0]), line => { const [, left, right, replacement] = split(line); this.CHECKCOMPOUNDPATTERN.add(new CompoundPattern(left, right, replacement)); }); break; } case "AF": { reader.for(parseInt(args[0]), line => { const [, value] = split(line); this.AF.push(this.parseFlags(value)); }); break; } case "AM": { reader.for(parseInt(args[0]), line => { const [, value] = split(line); this.AM.push(new Set(value.split(""))); }); break; } case "COMPOUNDSYLLABLE": { this.COMPOUNDSYLLABLE = [parseInt(args[0]), args[1]]; break; } case "PHONE": { const rules = []; reader.for(parseInt(args[0]), line => { const [, search, replacement] = split(line); rules.push([search, replacement]); }); this.PHONE = new PhonetTable(rules); break; } } } while (reader.next()); if (this.CHECKSHARPS) { this.casing = new GermanCasing(); } else if (this.LANG && ["tr", "tr_TR", "az", "crh"].includes(this.LANG)) { this.casing = new TurkicCasing(); } else { this.casing = new Casing(); } this.prefixesIndex = new Trie(); for (const [, prefixes] of this.PFX) { for (const prefix of prefixes) { this.prefixesIndex.add(prefix.add, set => !set ? new Set([prefix]) : set.add(prefix)); } } this.suffixesIndex = new Trie(); for (const [, suffixes] of this.SFX) { for (const suffix of suffixes) { this.suffixesIndex.add(reverse(suffix.add), set => !set ? new Set([suffix]) : set.add(suffix)); } } if (override) Object.assign(this, override); } /** Parses a string and returns the first {@link Flag} found. */ parseFlag(flag) { return [...this.parseFlags(flag)][0]; } /** Parses a string and returns the {@link Flags} found. */ parseFlags(flags) { if (typeof flags === "string") flags = [flags]; const result = flags.flatMap(flag => { if (this.AF.length && this.AF[parseInt(flag) - 1]) { return [...this.AF[parseInt(flag) - 1]]; } // prettier-ignore switch (this.FLAG) { case "UTF-8": case "short": return [...flag]; case "long": return C.FLAG_LONG_REGEX.exec(flag)?.slice(1) ?? []; case "numeric": return flag.split(","); } }); return new Set(result); } /** * Utility for handling special cases involving the `CHECKSHARPS` * directive. Returns false if the directive itself is disabled, but * otherwise will determine if the given word contains a `ß`. * * @param word - The word to check. */ isSharps(word) { if (!this.CHECKSHARPS) return false; return word.includes("ß"); } /** * Utility for applying the {@link Aff.IGNORE} transformation to a string. * Does nothing if the `IGNORE` directive isn't present. * * @param str - The string to transform. */ ignore(str) { if (this.IGNORE) { for (const ch of this.IGNORE) { str = str.replaceAll(ch, ""); } } return str; } } //# sourceMappingURL=index.js.map