@dcoffey/espells
Version:
Pure JS/TS spellchecker, using Hunspell dictionaries. Based on Spylls.
146 lines • 5.46 kB
JavaScript
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
import { iterate } from "iterare";
import { intersect, product, re } from "../util.js";
// TODO: make this not use a weird regex matching system
// I couldn't exactly wrap my brain around how to avoid using regex here
// I'm sure it's not hard. but right now this system is very slow because
// of all the permutations made by the `product(...relevant)` call.
/** {@link Rule} quantifiers. */
var Quantifier;
(function (Quantifier) {
/** Must match the flag. */
Quantifier[Quantifier["ONE"] = 0] = "ONE";
/** May optionally match the flag repeatedly. ("*" quantifier) */
Quantifier[Quantifier["ZERO_OR_MORE"] = 1] = "ZERO_OR_MORE";
/** May optionally match the flag. ("?" quantifier) */
Quantifier[Quantifier["ZERO_OR_ONE"] = 2] = "ZERO_OR_ONE";
})(Quantifier || (Quantifier = {}));
/**
* A `RegExp`-like rule for generating compound rules. It is an alternative
* way of specifying compound words to the {@link Aff.COMPOUNDFLAG} (and
* similar) {@link Flag}s. It uses the following syntax:
*
* ```text
* COMPOUNDRULE A*B?CD
* ```
*
* Which should be parsed as: A compound word may consist of zero or more
* words with the {@link Flag} `A`, then optionally a word with the
* {@link Flag} `B`, and then finally the compound must end with a word with
* the {@link Flag} `C` and a word with the {@link Flag} `D`.
*
* The similarity of this to a `RegExp` is exploited by both Spylls and
* Espells. The algorithm used to check for matches involves taking a
* {@link FlagSet} (representing words) and turning it into a string that is
* checked by a `RegExp`.
*/
export class CompoundRule {
/**
* @param rule - The `RegExp`-like syntax to generate this rule.
* @param aff - The {@link Aff} data to use when parsing flags.
*/
constructor(rule, aff) {
this.flags = new Set();
this.rules = parseCompoundRule(rule, aff);
const parts = [];
for (const rule of this.rules) {
this.flags.add(rule.flag);
// long flags might use the () characters, so we need to escape them
const flag = rule.flag.replaceAll(")", "\\)").replaceAll("(", "\\(");
parts.push(`(${flag})${quantifierChar(rule.quantifier)}`);
}
this.regex = re `/^${parts.join("")}$/`;
this.partialRegex = re `/^${parts.reduceRight((acc, cur) => `${cur}(${acc})?`)}$/`;
}
/**
* Determines if a {@link FlagSet} matches this rule.
*
* @param flags - The {@link FlagSet} to check.
* @param partial - If true, the {@link FlagSet} will only need to
* partially match the rule to be considered valid. This is so that a
* compound word can be checked for if it *can* continue in some way.
*/
match(flags, partial = false) {
// empty rule set
if (!this.rules.length)
return false;
const relevant = iterate(flags)
.map(f => intersect(this.flags, f))
.filter(set => set.size !== 0)
.toSet();
// no flags in common
if (relevant.size === 0)
return false;
return iterate(product(...flags)).some(fc => partial ? this.partialRegex.test(fc.join("")) : this.regex.test(fc.join("")));
}
}
/**
* Parses the source for a {@link CompoundRule}.
*
* @param text - The rule to parse.
* @param aff - The {@link Aff} used for flag parsing.
*/
function parseCompoundRule(text, aff) {
const rules = [];
for (let i = 0; i < text.length; i++) {
let flag = "";
let quantifier = Quantifier.ONE;
if (text[i] === "(") {
i++; // move past the opening parenthesis
while (text[i] !== ")" && i < text.length) {
flag += text[i];
i++;
}
if (text[i + 1] === "?") {
quantifier = Quantifier.ZERO_OR_ONE;
i++;
}
else if (text[i + 1] === "*") {
quantifier = Quantifier.ZERO_OR_MORE;
i++;
}
rules.push({ flag, quantifier });
continue;
}
switch (aff.FLAG) {
case "UTF-8":
case "short": {
flag += text[i];
break;
}
case "long": {
flag += text.slice(i, i + 1);
break;
}
case "numeric": {
while (/\d/.test(text[i])) {
flag += text[i];
i++;
}
i--; // move back a position to make the code ahead consistent
}
}
if (text[i + 1] === "?") {
quantifier = Quantifier.ZERO_OR_ONE;
i++;
}
else if (text[i + 1] === "*") {
quantifier = Quantifier.ZERO_OR_MORE;
i++;
}
rules.push({ flag, quantifier });
}
return rules;
}
/** Returns the relevant character used to denote a {@link Quantifier}. */
function quantifierChar(quantifier) {
// prettier-ignore
switch (quantifier) {
case Quantifier.ZERO_OR_MORE: return "*";
case Quantifier.ZERO_OR_ONE: return "?";
}
return "";
}
//# sourceMappingURL=compound-rule.js.map