hunspell-reader
Version:
A library for reading Hunspell Dictionary Files
228 lines • 7.82 kB
TypeScript
import type { AffInfo } from './affDef.js';
import { Converter } from './converter.js';
/** The `word` field in a Converted AffWord has been converted using the OCONV mapping */
export interface ConvertedAffixWord extends AffixWord {
originalWord: string;
}
export declare class Aff {
#private;
readonly affInfo: AffInfo;
protected affData: AffData;
protected _oConv: Converter;
protected _iConv: Converter;
private _maxSuffixDepth;
constructor(affInfo: AffInfo, filename: string);
get maxSuffixDepth(): number;
set maxSuffixDepth(value: number);
/**
* Takes a line from a hunspell.dic file and applies the rules found in the aff file.
* For performance reasons, only the `word` field is mapped with OCONV.
* @param {string} line - the line from the .dic file.
*/
applyRulesToDicEntry(line: string, maxDepth?: number): ConvertedAffixWord[];
/**
* @internal
*/
applyRulesToWord(affWord: AffixWord, remainingDepth: number): AffixWord[];
applyAffixesToWord(affWord: AffixWord, remainingDepth: number): AffixWord[];
applyAffixToWord(rule: FxRule, affWord: AffixWord, combinableSfx: FxRule[]): AffixWord[];
getMatchingRules(flags: string): AffRule[];
/**
* Convert the applied rule indexes to AFF Letters.
* Requires that the affixWord was generated with trace mode turned on.
* @param affixWord - the generated AffixWord.
*/
getFlagsValuesForAffixWord(affixWord: AffixWord): string[] | undefined;
get iConv(): Converter;
get oConv(): Converter;
setTraceMode(value: boolean): void;
}
export declare function compareAff(a: AffixWord, b: AffixWord): number;
export declare enum AffixFlags {
none = 0,
/**
* COMPOUNDFLAG flag
*
* Words signed with COMPOUNDFLAG may be in compound words (except when word shorter than COMPOUNDMIN).
* Affixes with COMPOUNDFLAG also permits compounding of affixed words.
*
*/
isCompoundPermitted = 1,
/**
* COMPOUNDBEGIN flag
*
* Words signed with COMPOUNDBEGIN (or with a signed affix) may be first elements in compound words.
*
*/
canBeCompoundBegin = 2,// default false
/**
* COMPOUNDMIDDLE flag
*
* Words signed with COMPOUNDMIDDLE (or with a signed affix) may be middle elements in compound words.
*
*/
canBeCompoundMiddle = 4,// default false
/**
* COMPOUNDLAST flag
*
* Words signed with COMPOUNDLAST (or with a signed affix) may be last elements in compound words.
*
*/
canBeCompoundEnd = 8,// default false
/**
* COMPOUNDPERMITFLAG flag
*
* Prefixes are allowed at the beginning of compounds, suffixes are allowed at the end of compounds by default.
* Affixes with COMPOUNDPERMITFLAG may be inside of compounds.
*
*/
isOnlyAllowedInCompound = 16,
/**
* COMPOUNDFORBIDFLAG flag
*
* Suffixes with this flag forbid compounding of the affixed word.
*
*/
isCompoundForbidden = 32,
/**
* WARN flag
*
* This flag is for rare words, which are also often spelling mistakes, see option -r of command line Hunspell and FORBIDWARN.
*/
isWarning = 64,
/**
* KEEPCASE flag
*
* Forbid uppercased and capitalized forms of words signed with KEEPCASE flags. Useful for special orthographies (measurements and
* currency often keep their case in uppercased texts) and writing systems (e.g. keeping lower case of IPA characters). Also valuable
* for words erroneously written in the wrong case.
*/
isKeepCase = 128,
/**
* FORCEUCASE flag
*
* Last word part of a compound with flag FORCEUCASE forces capitalization of the whole compound word.
* Eg. Dutch word "straat" (street) with FORCEUCASE flags will allowed only in capitalized compound forms,
* according to the Dutch spelling rules for proper names.
*/
isForceUCase = 256,
/**
* FORBIDDENWORD flag
*
* This flag signs forbidden word form. Because affixed forms are also forbidden, we can subtract a subset from set of the
* accepted affixed and compound words. Note: useful to forbid erroneous words, generated by the compounding mechanism.
*/
isForbiddenWord = 512,
/**
* NOSUGGEST flag
*
* Words signed with NOSUGGEST flag are not suggested (but still accepted when typed correctly). Proposed flag for vulgar
* and obscene words (see also SUBSTANDARD).
*/
isNoSuggest = 1024,
/**
* NEEDAFFIX flag
*
* This flag signs virtual stems in the dictionary, words only valid when affixed. Except, if the dictionary word has a homonym
* or a zero affix. NEEDAFFIX works also with prefixes and prefix + suffix combinations (see tests/pseudoroot5.*).
*/
isNeedAffix = 2048
}
type RuleIdx = number;
type SingleFlag = string;
type WordFlags = string;
type DictionaryLine = string;
interface DictionaryEntry {
word: string;
/** flags are the part after the `/`, `word/FLAGS` */
flags: string;
/** The original dictionary line. */
line: string;
}
export interface AffixWordSource {
/** Original dictionary entry */
dict: DictionaryEntry;
/** Optional applied rules, trace mode must be turned on. */
appliedRules?: number[] | undefined;
}
export interface AffixWord extends AffixWordSource {
/** The word */
word: string;
/** Rules to apply */
rules: FxRule[] | undefined;
/** Flags */
flags: AffixFlags;
}
declare class AffData {
#private;
private affInfo;
readonly filename: string;
rules: AffRule[];
mapToRuleIdx: Map<SingleFlag, RuleIdx | RuleIdx[]>;
mapWordRulesToRuleIndexes: Map<WordFlags, RuleIdx[]>;
mapWordRulesToRules: Map<WordFlags, AffRule[]>;
affFlagType: 'long' | 'num' | 'char';
missingFlags: Set<string>;
private _mapRuleIdxToRules;
trace: boolean;
constructor(affInfo: AffInfo, filename: string);
dictLineToEntry(line: DictionaryLine): DictionaryEntry;
dictLineToAffixWord(line: DictionaryLine): AffixWord;
toAffixWord(source: AffixWordSource | AffixWord, word: string, flags: AffixFlags, rules: AffRule[] | undefined): AffixWord;
getRules(rules: WordFlags): AffRule[];
getRuleIndexes(rules: WordFlags): RuleIdx[];
rulesToFlags(rules: AffRule[]): AffixFlags;
getRulesForIndexes(indexes: undefined): undefined;
getRulesForIndexes(indexes: RuleIdx[]): AffRule[];
getRulesForIndexes(indexes: RuleIdx[] | undefined): AffRule[] | undefined;
getRulesForAffSubstitution(sub: AffSubstitution): AffRule[] | undefined;
}
type AffType = 'P' | 'S';
interface AffFx {
type: AffType;
id: string;
combinable: boolean;
substitutionsForRegExps: AffSubstitutionsForRegExp[];
}
interface AffSubstitution {
type: AffType;
remove: string;
attach: string;
attachRules?: RuleIdx[] | undefined;
replace: RegExp;
}
interface AffSubstitutionsForRegExp {
match: RegExp;
substitutionsGroupedByRemove: Map<RegExp, AffSubstitution[]>;
}
type AffRule = FlagRule | FxRule;
type FxRule = PfxRule | SfxRule;
type RuleType = 'S' | 'P' | 'F';
interface RuleBase {
id: string;
idx: number;
type: RuleType;
flags: AffixFlags;
px?: AffFx;
}
interface FlagRule extends RuleBase {
type: 'F';
flags: AffixFlags;
}
interface PfxRule extends RuleBase {
type: 'P';
fx: AffFx;
}
interface SfxRule extends RuleBase {
type: 'S';
fx: AffFx;
}
type AffFlagType = 'long' | 'num' | 'char';
/**
*
* @param FLAG - the FLAG value from the aff file
* @returns the AffFlagType or throws
*/
export declare function toAffFlagType(FLAG: string | undefined): AffFlagType;
export {};
//# sourceMappingURL=aff.d.ts.map