@dcoffey/espells
Version:
Pure JS/TS spellchecker, using Hunspell dictionaries. Based on Spylls.
145 lines • 5.84 kB
JavaScript
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */
export const CONSTANTS = {
/**
* A record of deprecated names that map to their proper name. Used in
* the {@link Aff} `.aff` parser.
*/
SYNONYMS: {
PSEUDOROOT: "NEEDAFFIX",
COMPOUNDLAST: "COMPOUNDEND"
},
/**
* `RegExp` used to split a string of flags in "long" format, i.e. each
* flag is two characters.
*/
FLAG_LONG_REGEX: /(..)(..)*/,
/**
* `RegExp` used to parse phoneme table rules.
*
* Groups:
*
* 1. Letters
* 2. Optional
* 3. Lookahead
* 4. Flags
* 5. Priority
*/
PHONET_RULE_REGEX: /^(\p{L}+)(?:\((\p{L}+)\))?(-+)?([\^$<]*)(\d)?$/u,
/**
* `RegExp` used by the {@link Dic} `.dic` parser to determine if a line
* should be skipped.
*/
DIC_SKIP_REGEX: /^\d+(\s+|$)|^\t|^\s*$/u,
/**
* `RegExp` used to split a `.dic` "word" into its various components. Groups:
*
* 1. Stem
* 2. Flags
* 3. Data (not split here, see {@link CONSTANTS.SPLIT_DATA_REGEX})
*/
SPLIT_WORD_REGEX: /^(.+?)(?:\/([\S]*?))?(?:(?:\t|(?:\s(?=[^\/]*?:.)))(.+))?$/u,
/**
* `RegExp` used to split a `.dic` word data key-value. Groups:
*
* 1. Key
* 2. Value
*/
SPLIT_DATA_REGEX: /(\S+):(\S+)/,
/** Maximum number of {@link PhonetTable} suggestions per list of suggestions. */
MAX_PHONET_SUGGESTIONS: 2,
/**
* Maximum number of suggestions generated when yielding permutations of
* a misspelling.
*/
MAX_SUGGESTIONS: 15,
/** Maximum number of permutations generated per permutation type. */
MAX_PERMUTATIONS: 500,
/** Maximum number of ngram "roots" (most similar words to a misspelling). */
NGRAM_MAX_ROOTS: 100,
/** Maximum number of ngram guesses that can be processed. */
NGRAM_MAX_GUESSES: 200,
/**
* Maximum number of {@link PhonetTable} "roots" (most similar words to a
* misspelling).
*/
PHONET_MAX_ROOTS: 100,
/**
* Maximum distance a character can be moved from its original position
* when making permutations of a word.
*/
MAX_CHAR_DISTANCE: 4,
/** `RegExp` used to split a string `RegExp`. Used in the {@link re} function. */
SPLIT_REGEX_REGEX: /^([^]*)\/([^]+)\/([^]*)$/u,
/** `RegExp` used to split an {@link Affix} condition. */
SPLIT_CONDITION_REGEX: /(\[.+\]|[^\[])/gu,
/**
* The default set of `RegExp`s used when breaking apart multiple words
* from a single string with {@link breakWord}.
*/
DEFAULT_BREAK: new Set([/(?!^)-(?=.)/g, /^-/g, /-$/g]),
/** `RegExp` used to match a line that is just a number. Used in the `.dic` parser. */
NUMBER_REGEX: /^\d+(\.\d+)?$/,
/** `RegExp` used to split a line based on whitespace. */
SPLIT_LINE_REGEX: /\s+/u
};
export const decoder = new TextDecoder();
/** The various capitalization types a word can have. */
export var CapType;
(function (CapType) {
/** All lowercase. */
CapType[CapType["NO"] = 0] = "NO";
/** Titlecase. */
CapType[CapType["INIT"] = 1] = "INIT";
/** All uppercase. */
CapType[CapType["ALL"] = 2] = "ALL";
/** Mixed capitalization. */
CapType[CapType["HUH"] = 3] = "HUH";
/** Mixed capitalization, first letter is capitalized. */
CapType[CapType["HUHINIT"] = 4] = "HUHINIT";
})(CapType || (CapType = {}));
/** The various positions a word in a compound word could be in. */
export var CompoundPos;
(function (CompoundPos) {
/** The compound segment is at the beginning of the word. */
CompoundPos[CompoundPos["BEGIN"] = 0] = "BEGIN";
/** The compound segment is somewhere in the middle of the word. */
CompoundPos[CompoundPos["MIDDLE"] = 1] = "MIDDLE";
/** The compound segment is at the end of the word. */
CompoundPos[CompoundPos["END"] = 2] = "END";
})(CompoundPos || (CompoundPos = {}));
/** Kinds of suggestions, based on how they were attained. */
export var SuggestionKind;
(function (SuggestionKind) {
SuggestionKind[SuggestionKind["CASE"] = 0] = "CASE";
SuggestionKind[SuggestionKind["FORCEUCASE"] = 1] = "FORCEUCASE";
SuggestionKind[SuggestionKind["UPPERCASE"] = 2] = "UPPERCASE";
SuggestionKind[SuggestionKind["REPLCHARS"] = 3] = "REPLCHARS";
SuggestionKind[SuggestionKind["SPACEWORD"] = 4] = "SPACEWORD";
SuggestionKind[SuggestionKind["MAPCHARS"] = 5] = "MAPCHARS";
SuggestionKind[SuggestionKind["SWAPCHAR"] = 6] = "SWAPCHAR";
SuggestionKind[SuggestionKind["LONGSWAPCHAR"] = 7] = "LONGSWAPCHAR";
SuggestionKind[SuggestionKind["BADCHARKEY"] = 8] = "BADCHARKEY";
SuggestionKind[SuggestionKind["EXTRACHAR"] = 9] = "EXTRACHAR";
SuggestionKind[SuggestionKind["FORGOTCHAR"] = 10] = "FORGOTCHAR";
SuggestionKind[SuggestionKind["MOVECHAR"] = 11] = "MOVECHAR";
SuggestionKind[SuggestionKind["BADCHAR"] = 12] = "BADCHAR";
SuggestionKind[SuggestionKind["DOUBLETWOCHARS"] = 13] = "DOUBLETWOCHARS";
SuggestionKind[SuggestionKind["TWOWORDS"] = 14] = "TWOWORDS";
SuggestionKind[SuggestionKind["DASHES"] = 15] = "DASHES";
SuggestionKind[SuggestionKind["NGRAM"] = 16] = "NGRAM";
SuggestionKind[SuggestionKind["PHONET"] = 17] = "PHONET";
})(SuggestionKind || (SuggestionKind = {}));
/**
* Types of "edits" to a misspelling that, if they result in a correct
* word, mean that their resultant suggestion is almost certainly what the
* misspelling was supposed to be and thus further suggestions shouldn't be
* generated.
*/
export const GOOD_EDITS = [
SuggestionKind.SPACEWORD,
SuggestionKind.REPLCHARS,
SuggestionKind.UPPERCASE
];
//# sourceMappingURL=constants.js.map