@dcoffey/espells
Version:
Pure JS/TS spellchecker, using Hunspell dictionaries. Based on Spylls.
601 lines • 21 kB
JavaScript
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
import { iterate } from "iterare";
import { CONSTANTS as C } from "../constants.js";
import { Trie } from "../trie.js";
import { escapeRegExp, re, reverse, split } from "../util.js";
import { Prefix, Suffix } from "./affix.js";
import { Casing, GermanCasing, TurkicCasing } from "./casing.js";
import { CompoundPattern } from "./compound-pattern.js";
import { CompoundRule } from "./compound-rule.js";
import { ConvTable } from "./conv-table.js";
import { PhonetTable } from "./phonet-table.js";
import { RepPattern } from "./rep-pattern.js";
/** A resolved and parsed representation of the data found in a Hunspell `.aff` file. */
export class Aff {
/**
* @param reader - The {@link Reader} instance to parse with.
* @param override - An optional object which allows for overriding
* whatever {@link AffData} that was parsed with something else.
*/
constructor(reader, override) {
// check AffData for descriptions of these properties
Object.defineProperty(this, "SET", {
enumerable: true,
configurable: true,
writable: true,
value: "UTF-8"
}); // unused
Object.defineProperty(this, "FLAG", {
enumerable: true,
configurable: true,
writable: true,
value: "short"
});
Object.defineProperty(this, "LANG", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "WORDCHARS", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
}); // unused
Object.defineProperty(this, "IGNORE", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "CHECKSHARPS", {
enumerable: true,
configurable: true,
writable: true,
value: false
});
Object.defineProperty(this, "FORBIDDENWORD", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "KEY", {
enumerable: true,
configurable: true,
writable: true,
value: "qwertyuiop|asdfghjkl|zxcvbnm"
});
Object.defineProperty(this, "TRY", {
enumerable: true,
configurable: true,
writable: true,
value: ""
});
Object.defineProperty(this, "NOSUGGEST", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "KEEPCASE", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "REP", {
enumerable: true,
configurable: true,
writable: true,
value: new Set()
});
Object.defineProperty(this, "MAP", {
enumerable: true,
configurable: true,
writable: true,
value: new Set()
});
Object.defineProperty(this, "NOSPLITSUGS", {
enumerable: true,
configurable: true,
writable: true,
value: false
});
Object.defineProperty(this, "PHONE", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "MAXCPDSUGS", {
enumerable: true,
configurable: true,
writable: true,
value: 3
});
Object.defineProperty(this, "MAXNGRAMSUGS", {
enumerable: true,
configurable: true,
writable: true,
value: 4
});
Object.defineProperty(this, "MAXDIFF", {
enumerable: true,
configurable: true,
writable: true,
value: 1
});
Object.defineProperty(this, "ONLYMAXDIFF", {
enumerable: true,
configurable: true,
writable: true,
value: false
});
Object.defineProperty(this, "PFX", {
enumerable: true,
configurable: true,
writable: true,
value: new Map()
});
Object.defineProperty(this, "SFX", {
enumerable: true,
configurable: true,
writable: true,
value: new Map()
});
Object.defineProperty(this, "NEEDAFFIX", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "CIRCUMFIX", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "COMPLEXPREFIXES", {
enumerable: true,
configurable: true,
writable: true,
value: false
});
Object.defineProperty(this, "FULLSTRIP", {
enumerable: true,
configurable: true,
writable: true,
value: false
});
Object.defineProperty(this, "BREAK", {
enumerable: true,
configurable: true,
writable: true,
value: C.DEFAULT_BREAK
});
Object.defineProperty(this, "COMPOUNDRULE", {
enumerable: true,
configurable: true,
writable: true,
value: new Set()
});
Object.defineProperty(this, "COMPOUNDMIN", {
enumerable: true,
configurable: true,
writable: true,
value: 3
});
Object.defineProperty(this, "COMPOUNDWORDMAX", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "COMPOUNDFLAG", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "COMPOUNDBEGIN", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "COMPOUNDMIDDLE", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "COMPOUNDEND", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "ONLYINCOMPOUND", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "COMPOUNDPERMITFLAG", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "COMPOUNDFORBIDFLAG", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "FORCEUCASE", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "CHECKCOMPOUNDCASE", {
enumerable: true,
configurable: true,
writable: true,
value: false
});
Object.defineProperty(this, "CHECKCOMPOUNDDUP", {
enumerable: true,
configurable: true,
writable: true,
value: false
});
Object.defineProperty(this, "CHECKCOMPOUNDREP", {
enumerable: true,
configurable: true,
writable: true,
value: false
});
Object.defineProperty(this, "CHECKCOMPOUNDTRIPLE", {
enumerable: true,
configurable: true,
writable: true,
value: false
});
Object.defineProperty(this, "CHECKCOMPOUNDPATTERN", {
enumerable: true,
configurable: true,
writable: true,
value: new Set()
});
Object.defineProperty(this, "SIMPLIFIEDTRIPLE", {
enumerable: true,
configurable: true,
writable: true,
value: false
});
Object.defineProperty(this, "COMPOUNDSYLLABLE", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
}); // unused
Object.defineProperty(this, "COMPOUNDMORESUFFIXES", {
enumerable: true,
configurable: true,
writable: true,
value: false
}); // unused
Object.defineProperty(this, "COMPOUNDROOT", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
}); // unused
Object.defineProperty(this, "ICONV", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "OCONV", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "AF", {
enumerable: true,
configurable: true,
writable: true,
value: []
});
Object.defineProperty(this, "AM", {
enumerable: true,
configurable: true,
writable: true,
value: []
});
Object.defineProperty(this, "WARN", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
});
Object.defineProperty(this, "FORBIDWARN", {
enumerable: true,
configurable: true,
writable: true,
value: false
});
Object.defineProperty(this, "SYLLABLENUM", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
}); // unused
Object.defineProperty(this, "SUBSTANDARD", {
enumerable: true,
configurable: true,
writable: true,
value: void 0
}); // unused
do {
if (reader.done)
break;
let [directive, ...args] = split(reader.line);
// skip directive if it doesn't seem real
if (!/^[A-Z]+$/.test(directive))
continue;
if (C.SYNONYMS.hasOwnProperty(directive))
directive = C.SYNONYMS[directive];
switch (directive) {
case "SET":
case "KEY":
case "TRY":
case "WORDCHARS":
case "LANG": {
this[directive] = args[0];
break;
}
case "FLAG": {
if (args[0] === "short" ||
args[0] === "long" ||
args[0] === "numeric" ||
args[0] === "UTF-8") {
this.FLAG = args[0];
}
// try alternatives
else if (args[0] === "num") {
this.FLAG = "numeric";
}
break;
}
case "IGNORE": {
this.IGNORE = new Set([...args].map(str => str.split("")).flat());
break;
}
case "MAXDIFF":
case "MAXNGRAMSUGS":
case "MAXCPDSUGS":
case "COMPOUNDMIN":
case "COMPOUNDWORDMAX": {
this[directive] = parseInt(args[0]);
break;
}
case "NOSUGGEST":
case "KEEPCASE":
case "CIRCUMFIX":
case "NEEDAFFIX":
case "FORBIDDENWORD":
case "WARN":
case "COMPOUNDFLAG":
case "COMPOUNDBEGIN":
case "COMPOUNDMIDDLE":
case "COMPOUNDEND":
case "ONLYINCOMPOUND":
case "COMPOUNDPERMITFLAG":
case "COMPOUNDFORBIDFLAG":
case "FORCEUCASE":
case "SUBSTANDARD":
case "SYLLABLENUM":
case "COMPOUNDROOT": {
this[directive] = this.parseFlag(args[0]);
break;
}
case "COMPLEXPREFIXES":
case "FULLSTRIP":
case "NOSPLITSUGS":
case "CHECKSHARPS":
case "CHECKCOMPOUNDCASE":
case "CHECKCOMPOUNDDUP":
case "CHECKCOMPOUNDREP":
case "CHECKCOMPOUNDTRIPLE":
case "SIMPLIFIEDTRIPLE":
case "ONLYMAXDIFF":
case "COMPOUNDMORESUFFIXES":
case "FORBIDWARN": {
this[directive] = true;
break;
}
case "BREAK": {
this.BREAK = new Set();
reader.for(parseInt(args[0]), line => {
let [, pattern] = split(line);
pattern = escapeRegExp(pattern).replaceAll("\\^", "^").replaceAll("\\$", "$");
if (!pattern.startsWith("^") && !pattern.endsWith("$")) {
pattern = `(?!^)${pattern}(?=.)`;
}
this.BREAK.add(re `/${pattern}/g`);
});
break;
}
case "COMPOUNDRULE": {
reader.for(parseInt(args[0]), line => {
const [, value] = split(line);
this.COMPOUNDRULE.add(new CompoundRule(value, this));
});
break;
}
case "ICONV":
case "OCONV": {
const pairs = [];
reader.for(parseInt(args[0]), line => {
const [, pattern, replacement] = split(line);
pairs.push([pattern, replacement]);
});
this[directive] = new ConvTable(pairs);
break;
}
case "REP": {
reader.for(parseInt(args[0]), line => {
const [, pattern, replacement] = split(line);
this.REP.add(new RepPattern(pattern, replacement));
});
break;
}
case "MAP": {
reader.for(parseInt(args[0]), line => {
const [, value] = split(line);
this.MAP.add(iterate(value.matchAll(/\(.*?\)|./g))
.map(match => match[0].replaceAll(/^\(|\)$/g, ""))
.toSet());
});
break;
}
// TODO: PFX, SFX data fields (see morph fixture)
case "PFX": {
const [flag, crossproduct, count] = args;
reader.for(parseInt(count), line => {
const [, , strip, add, cond = ""] = split(line);
const prefix = new Prefix(flag, crossproduct, strip, add, cond, this);
const set = this.PFX.get(flag) ?? new Set();
this.PFX.set(flag, set.add(prefix));
});
break;
}
case "SFX": {
const [flag, crossproduct, count] = args;
reader.for(parseInt(count), line => {
const [, , strip, add, cond = ""] = split(line);
const suffix = new Suffix(flag, crossproduct, strip, add, cond, this);
const set = this.SFX.get(flag) ?? new Set();
this.SFX.set(flag, set.add(suffix));
});
break;
}
case "CHECKCOMPOUNDPATTERN": {
reader.for(parseInt(args[0]), line => {
const [, left, right, replacement] = split(line);
this.CHECKCOMPOUNDPATTERN.add(new CompoundPattern(left, right, replacement));
});
break;
}
case "AF": {
reader.for(parseInt(args[0]), line => {
const [, value] = split(line);
this.AF.push(this.parseFlags(value));
});
break;
}
case "AM": {
reader.for(parseInt(args[0]), line => {
const [, value] = split(line);
this.AM.push(new Set(value.split("")));
});
break;
}
case "COMPOUNDSYLLABLE": {
this.COMPOUNDSYLLABLE = [parseInt(args[0]), args[1]];
break;
}
case "PHONE": {
const rules = [];
reader.for(parseInt(args[0]), line => {
const [, search, replacement] = split(line);
rules.push([search, replacement]);
});
this.PHONE = new PhonetTable(rules);
break;
}
}
} while (reader.next());
if (this.CHECKSHARPS) {
this.casing = new GermanCasing();
}
else if (this.LANG && ["tr", "tr_TR", "az", "crh"].includes(this.LANG)) {
this.casing = new TurkicCasing();
}
else {
this.casing = new Casing();
}
this.prefixesIndex = new Trie();
for (const [, prefixes] of this.PFX) {
for (const prefix of prefixes) {
this.prefixesIndex.add(prefix.add, set => !set ? new Set([prefix]) : set.add(prefix));
}
}
this.suffixesIndex = new Trie();
for (const [, suffixes] of this.SFX) {
for (const suffix of suffixes) {
this.suffixesIndex.add(reverse(suffix.add), set => !set ? new Set([suffix]) : set.add(suffix));
}
}
if (override)
Object.assign(this, override);
}
/** Parses a string and returns the first {@link Flag} found. */
parseFlag(flag) {
return [...this.parseFlags(flag)][0];
}
/** Parses a string and returns the {@link Flags} found. */
parseFlags(flags) {
if (typeof flags === "string")
flags = [flags];
const result = flags.flatMap(flag => {
if (this.AF.length && this.AF[parseInt(flag) - 1]) {
return [...this.AF[parseInt(flag) - 1]];
}
// prettier-ignore
switch (this.FLAG) {
case "UTF-8":
case "short": return [...flag];
case "long": return C.FLAG_LONG_REGEX.exec(flag)?.slice(1) ?? [];
case "numeric": return flag.split(",");
}
});
return new Set(result);
}
/**
* Utility for handling special cases involving the `CHECKSHARPS`
* directive. Returns false if the directive itself is disabled, but
* otherwise will determine if the given word contains a `ß`.
*
* @param word - The word to check.
*/
isSharps(word) {
if (!this.CHECKSHARPS)
return false;
return word.includes("ß");
}
/**
* Utility for applying the {@link Aff.IGNORE} transformation to a string.
* Does nothing if the `IGNORE` directive isn't present.
*
* @param str - The string to transform.
*/
ignore(str) {
if (this.IGNORE) {
for (const ch of this.IGNORE) {
str = str.replaceAll(ch, "");
}
}
return str;
}
}
//# sourceMappingURL=index.js.map