hunspell-reader
Version:
A library for reading Hunspell Dictionary Files
325 lines • 12.3 kB
JavaScript
;
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.debug = exports.filterAff = exports.compareAff = exports.asAffWord = exports.flagsToString = exports.affWordToColoredString = exports.logAffWord = exports.processRules = exports.Aff = void 0;
const GS = __importStar(require("gensequence"));
const gensequence_1 = require("gensequence");
const util = __importStar(require("util"));
const converter_1 = require("./converter");
const util_1 = require("./util");
const log = false;
const DefaultMaxDepth = 5;
class Aff {
constructor(affInfo) {
this.affInfo = affInfo;
this._maxSuffixDepth = DefaultMaxDepth;
this.rules = processRules(affInfo);
this._iConv = new converter_1.Converter(affInfo.ICONV || []);
this._oConv = new converter_1.Converter(affInfo.OCONV || []);
}
get maxSuffixDepth() {
return this._maxSuffixDepth;
}
set maxSuffixDepth(value) {
this._maxSuffixDepth = value;
}
/**
* Takes a line from a hunspell.dic file and applies the rules found in the aff file.
* For performance reasons, only the `word` field is mapped with OCONV.
* @param {string} line - the line from the .dic file.
*/
applyRulesToDicEntry(line, maxDepth) {
const maxSuffixDepth = maxDepth ?? this.maxSuffixDepth;
const [lineLeft] = line.split(/\s+/, 1);
const [word, rules = ''] = lineLeft.split('/', 2);
const results = this.applyRulesToWord(asAffWord(word, rules), maxSuffixDepth).map((affWord) => ({
...affWord,
word: this._oConv.convert(affWord.word),
}));
results.sort(compareAff);
const filtered = results.filter(filterAff());
return filtered;
}
/**
* @internal
*/
applyRulesToWord(affWord, remainingDepth) {
const compoundMin = this.affInfo.COMPOUNDMIN ?? 3;
const { word, base, suffix, prefix, dic } = affWord;
const allRules = this.getMatchingRules(affWord.rules);
const { rulesApplied, flags } = allRules
.filter((rule) => !!rule.flags)
.reduce((acc, rule) => ({
rulesApplied: [acc.rulesApplied, rule.id].join(' '),
flags: { ...acc.flags, ...rule.flags },
}), { rulesApplied: affWord.rulesApplied, flags: affWord.flags });
const rules = this.joinRules(allRules.filter((rule) => !rule.flags).map((rule) => rule.id));
const affixRules = allRules.map((rule) => rule.sfx || rule.pfx).filter(util_1.isDefined);
const wordWithFlags = { word, flags, rulesApplied, rules: '', base, suffix, prefix, dic };
return [wordWithFlags, ...this.applyAffixesToWord(affixRules, { ...wordWithFlags, rules }, remainingDepth)]
.filter(({ flags }) => !flags.isNeedAffix)
.map((affWord) => adjustCompounding(affWord, compoundMin))
.map((affWord) => logAffWord(affWord, 'applyRulesToWord'));
}
applyAffixesToWord(affixRules, affWord, remainingDepth) {
if (remainingDepth <= 0) {
return [];
}
const combinableRules = affixRules
.filter((rule) => rule.type === 'SFX')
.filter((rule) => rule.combinable === true)
.map(({ id }) => id);
const combinableSfx = this.joinRules(combinableRules);
const r = affixRules
.map((affix) => this.applyAffixToWord(affix, affWord, combinableSfx))
.reduce((a, b) => a.concat(b), [])
.map((affWord) => this.applyRulesToWord(affWord, remainingDepth - 1))
.reduce((a, b) => a.concat(b), []);
return r;
}
applyAffixToWord(affix, affWord, combinableSfx) {
const { word } = affWord;
const combineRules = affix.type === 'PFX' && affix.combinable && !!combinableSfx ? combinableSfx : '';
const flags = affWord.flags.isNeedAffix ? removeNeedAffix(affWord.flags) : affWord.flags;
const matchingSubstitutions = [...affix.substitutionSets.values()].filter((sub) => sub.match.test(word));
const partialAffWord = { ...affWord, flags, rules: combineRules };
return matchingSubstitutions
.map((sub) => sub.substitutions)
.reduce((a, b) => a.concat(b), [])
.filter((sub) => sub.remove === '0' || sub.replace.test(word))
.map((sub) => this.substitute(affix, partialAffWord, sub))
.map((affWord) => logAffWord(affWord, 'applyAffixToWord'));
}
substitute(affix, affWord, sub) {
const { word: origWord, rulesApplied, flags, dic } = affWord;
const rules = affWord.rules + (sub.attachRules || '');
const word = origWord.replace(sub.replace, sub.attach);
const stripped = origWord.replace(sub.replace, '');
let p = affWord.prefix.length;
let s = origWord.length - affWord.suffix.length;
if (affix.type === 'SFX') {
s = Math.min(stripped.length, s);
p = Math.min(p, s);
}
else {
const d = word.length - origWord.length;
p = Math.max(p, word.length - stripped.length);
s = Math.max(s + d, p);
}
const base = word.slice(p, s);
const prefix = word.slice(0, p);
const suffix = word.slice(s);
return {
word,
rulesApplied: rulesApplied + ' ' + affix.id,
rules,
flags,
base,
suffix,
prefix,
dic,
};
}
getMatchingRules(rules) {
const { AF = [] } = this.affInfo;
const idx = parseInt(rules, 10);
const rulesToSplit = AF[idx] || rules;
return this.separateRules(rulesToSplit)
.map((key) => this.rules.get(key))
.filter(util_1.isDefined);
}
joinRules(rules) {
switch (this.affInfo.FLAG) {
case 'long':
return rules.join('');
case 'num':
return rules.join(',');
}
return rules.join('');
}
separateRules(rules) {
switch (this.affInfo.FLAG) {
case 'long':
return [...new Set(rules.replace(/(..)/g, '$1//').split('//').slice(0, -1))];
case 'num':
return [...new Set(rules.split(','))];
}
return [...new Set(rules.split(''))];
}
get iConv() {
return this._iConv;
}
get oConv() {
return this._oConv;
}
}
exports.Aff = Aff;
function signature(aff) {
const { word, flags } = aff;
const sig = Object.entries(flags)
.filter((e) => !!e[1])
.map((f) => flagToStringMap[f[0]])
.sort()
.join('');
return word + '|' + sig;
}
function processRules(affInfo) {
const sfxRules = (0, gensequence_1.genSequence)(affInfo.SFX || [])
.map(([, sfx]) => sfx)
.map((sfx) => ({ id: sfx.id, type: 'sfx', sfx }));
const pfxRules = (0, gensequence_1.genSequence)(affInfo.PFX || [])
.map(([, pfx]) => pfx)
.map((pfx) => ({ id: pfx.id, type: 'pfx', pfx }));
const flagRules = GS.sequenceFromObject(affInfo)
.filter(([key, value]) => !!affFlag[key] && !!value)
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
.map(([key, value]) => ({ id: value, type: 'flag', flags: affFlag[key] }));
const rules = sfxRules
.concat(pfxRules)
.concat(flagRules)
.reduce((acc, rule) => {
acc.set(rule.id, rule);
return acc;
}, new Map());
return rules;
}
exports.processRules = processRules;
const affFlag = {
KEEPCASE: { isKeepCase: true },
WARN: { isWarning: true },
FORCEUCASE: { isForceUCase: true },
FORBIDDENWORD: { isForbiddenWord: true },
NOSUGGEST: { isNoSuggest: true },
NEEDAFFIX: { isNeedAffix: true },
COMPOUNDBEGIN: { canBeCompoundBegin: true },
COMPOUNDMIDDLE: { canBeCompoundMiddle: true },
COMPOUNDEND: { canBeCompoundEnd: true },
COMPOUNDFLAG: { isCompoundPermitted: true },
COMPOUNDPERMITFLAG: { isCompoundPermitted: true },
COMPOUNDFORBIDFLAG: { isCompoundForbidden: true },
ONLYINCOMPOUND: { isOnlyAllowedInCompound: true },
};
const _FlagToStringMap = {
isCompoundPermitted: 'C',
canBeCompoundBegin: 'B',
canBeCompoundMiddle: 'M',
canBeCompoundEnd: 'E',
isOnlyAllowedInCompound: 'O',
isWarning: 'W',
isKeepCase: 'K',
isForceUCase: 'U',
isForbiddenWord: 'F',
isNoSuggest: 'N',
isNeedAffix: 'A',
isCompoundForbidden: '-',
};
const _FlagToLongStringMap = {
isCompoundPermitted: 'CompoundPermitted',
canBeCompoundBegin: 'CompoundBegin',
canBeCompoundMiddle: 'CompoundMiddle',
canBeCompoundEnd: 'CompoundEnd',
isOnlyAllowedInCompound: 'OnlyInCompound',
isWarning: 'Warning',
isKeepCase: 'KeepCase',
isForceUCase: 'ForceUpperCase',
isForbiddenWord: 'Forbidden',
isNoSuggest: 'NoSuggest',
isNeedAffix: 'NeedAffix',
isCompoundForbidden: 'CompoundForbidden',
};
const flagToStringMap = _FlagToStringMap;
const flagToLongStringMap = _FlagToLongStringMap;
function logAffWord(affWord, message) {
/* istanbul ignore if */
if (log) {
const dump = util.inspect(affWord, { showHidden: false, depth: 5, colors: true });
console.log(`${message}: ${dump}`);
}
return affWord;
}
exports.logAffWord = logAffWord;
/* istanbul ignore next */
function affWordToColoredString(affWord) {
return util
.inspect({ ...affWord, flags: flagsToString(affWord.flags) }, { showHidden: false, depth: 5, colors: true })
.replace(/(\s|\n|\r)+/g, ' ');
}
exports.affWordToColoredString = affWordToColoredString;
/* istanbul ignore next */
function flagsToString(flags) {
return [...Object.entries(flags)]
.filter(([, v]) => !!v)
.map(([k]) => flagToLongStringMap[k])
.sort()
.join(':');
}
exports.flagsToString = flagsToString;
function asAffWord(word, rules = '', flags = {}) {
return {
word,
base: word,
prefix: '',
suffix: '',
rulesApplied: '',
rules,
flags,
dic: rules ? word + '/' + rules : word,
};
}
exports.asAffWord = asAffWord;
function compareAff(a, b) {
if (a.word !== b.word) {
return a.word < b.word ? -1 : 1;
}
const sigA = signature(a);
const sigB = signature(b);
return sigA < sigB ? -1 : sigA > sigB ? 1 : 0;
}
exports.compareAff = compareAff;
/**
* Returns a filter function that will filter adjacent AffWords
* It compares the word and the flags.
*/
function filterAff() {
return (0, util_1.filterOrderedList)((a, b) => a.word !== b.word || signature(a) !== signature(b));
}
exports.filterAff = filterAff;
exports.debug = {
signature,
};
function removeNeedAffix(flags) {
const newFlags = { ...flags };
delete newFlags.isNeedAffix;
return newFlags;
}
function adjustCompounding(affWord, minLength) {
if (!affWord.flags.isCompoundPermitted || affWord.word.length >= minLength) {
return affWord;
}
const { isCompoundPermitted: _, ...flags } = affWord.flags;
affWord.flags = flags;
return affWord;
}
//# sourceMappingURL=aff.js.map