UNPKG

hunspell-reader

Version:
325 lines 12.3 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); __setModuleDefault(result, mod); return result; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.debug = exports.filterAff = exports.compareAff = exports.asAffWord = exports.flagsToString = exports.affWordToColoredString = exports.logAffWord = exports.processRules = exports.Aff = void 0; const GS = __importStar(require("gensequence")); const gensequence_1 = require("gensequence"); const util = __importStar(require("util")); const converter_1 = require("./converter"); const util_1 = require("./util"); const log = false; const DefaultMaxDepth = 5; class Aff { constructor(affInfo) { this.affInfo = affInfo; this._maxSuffixDepth = DefaultMaxDepth; this.rules = processRules(affInfo); this._iConv = new converter_1.Converter(affInfo.ICONV || []); this._oConv = new converter_1.Converter(affInfo.OCONV || []); } get maxSuffixDepth() { return this._maxSuffixDepth; } set maxSuffixDepth(value) { this._maxSuffixDepth = value; } /** * Takes a line from a hunspell.dic file and applies the rules found in the aff file. * For performance reasons, only the `word` field is mapped with OCONV. * @param {string} line - the line from the .dic file. */ applyRulesToDicEntry(line, maxDepth) { const maxSuffixDepth = maxDepth ?? this.maxSuffixDepth; const [lineLeft] = line.split(/\s+/, 1); const [word, rules = ''] = lineLeft.split('/', 2); const results = this.applyRulesToWord(asAffWord(word, rules), maxSuffixDepth).map((affWord) => ({ ...affWord, word: this._oConv.convert(affWord.word), })); results.sort(compareAff); const filtered = results.filter(filterAff()); return filtered; } /** * @internal */ applyRulesToWord(affWord, remainingDepth) { const compoundMin = this.affInfo.COMPOUNDMIN ?? 3; const { word, base, suffix, prefix, dic } = affWord; const allRules = this.getMatchingRules(affWord.rules); const { rulesApplied, flags } = allRules .filter((rule) => !!rule.flags) .reduce((acc, rule) => ({ rulesApplied: [acc.rulesApplied, rule.id].join(' '), flags: { ...acc.flags, ...rule.flags }, }), { rulesApplied: affWord.rulesApplied, flags: affWord.flags }); const rules = this.joinRules(allRules.filter((rule) => !rule.flags).map((rule) => rule.id)); const affixRules = allRules.map((rule) => rule.sfx || rule.pfx).filter(util_1.isDefined); const wordWithFlags = { word, flags, rulesApplied, rules: '', base, suffix, prefix, dic }; return [wordWithFlags, ...this.applyAffixesToWord(affixRules, { ...wordWithFlags, rules }, remainingDepth)] .filter(({ flags }) => !flags.isNeedAffix) .map((affWord) => adjustCompounding(affWord, compoundMin)) .map((affWord) => logAffWord(affWord, 'applyRulesToWord')); } applyAffixesToWord(affixRules, affWord, remainingDepth) { if (remainingDepth <= 0) { return []; } const combinableRules = affixRules .filter((rule) => rule.type === 'SFX') .filter((rule) => rule.combinable === true) .map(({ id }) => id); const combinableSfx = this.joinRules(combinableRules); const r = affixRules .map((affix) => this.applyAffixToWord(affix, affWord, combinableSfx)) .reduce((a, b) => a.concat(b), []) .map((affWord) => this.applyRulesToWord(affWord, remainingDepth - 1)) .reduce((a, b) => a.concat(b), []); return r; } applyAffixToWord(affix, affWord, combinableSfx) { const { word } = affWord; const combineRules = affix.type === 'PFX' && affix.combinable && !!combinableSfx ? combinableSfx : ''; const flags = affWord.flags.isNeedAffix ? removeNeedAffix(affWord.flags) : affWord.flags; const matchingSubstitutions = [...affix.substitutionSets.values()].filter((sub) => sub.match.test(word)); const partialAffWord = { ...affWord, flags, rules: combineRules }; return matchingSubstitutions .map((sub) => sub.substitutions) .reduce((a, b) => a.concat(b), []) .filter((sub) => sub.remove === '0' || sub.replace.test(word)) .map((sub) => this.substitute(affix, partialAffWord, sub)) .map((affWord) => logAffWord(affWord, 'applyAffixToWord')); } substitute(affix, affWord, sub) { const { word: origWord, rulesApplied, flags, dic } = affWord; const rules = affWord.rules + (sub.attachRules || ''); const word = origWord.replace(sub.replace, sub.attach); const stripped = origWord.replace(sub.replace, ''); let p = affWord.prefix.length; let s = origWord.length - affWord.suffix.length; if (affix.type === 'SFX') { s = Math.min(stripped.length, s); p = Math.min(p, s); } else { const d = word.length - origWord.length; p = Math.max(p, word.length - stripped.length); s = Math.max(s + d, p); } const base = word.slice(p, s); const prefix = word.slice(0, p); const suffix = word.slice(s); return { word, rulesApplied: rulesApplied + ' ' + affix.id, rules, flags, base, suffix, prefix, dic, }; } getMatchingRules(rules) { const { AF = [] } = this.affInfo; const idx = parseInt(rules, 10); const rulesToSplit = AF[idx] || rules; return this.separateRules(rulesToSplit) .map((key) => this.rules.get(key)) .filter(util_1.isDefined); } joinRules(rules) { switch (this.affInfo.FLAG) { case 'long': return rules.join(''); case 'num': return rules.join(','); } return rules.join(''); } separateRules(rules) { switch (this.affInfo.FLAG) { case 'long': return [...new Set(rules.replace(/(..)/g, '$1//').split('//').slice(0, -1))]; case 'num': return [...new Set(rules.split(','))]; } return [...new Set(rules.split(''))]; } get iConv() { return this._iConv; } get oConv() { return this._oConv; } } exports.Aff = Aff; function signature(aff) { const { word, flags } = aff; const sig = Object.entries(flags) .filter((e) => !!e[1]) .map((f) => flagToStringMap[f[0]]) .sort() .join(''); return word + '|' + sig; } function processRules(affInfo) { const sfxRules = (0, gensequence_1.genSequence)(affInfo.SFX || []) .map(([, sfx]) => sfx) .map((sfx) => ({ id: sfx.id, type: 'sfx', sfx })); const pfxRules = (0, gensequence_1.genSequence)(affInfo.PFX || []) .map(([, pfx]) => pfx) .map((pfx) => ({ id: pfx.id, type: 'pfx', pfx })); const flagRules = GS.sequenceFromObject(affInfo) .filter(([key, value]) => !!affFlag[key] && !!value) // eslint-disable-next-line @typescript-eslint/no-non-null-assertion .map(([key, value]) => ({ id: value, type: 'flag', flags: affFlag[key] })); const rules = sfxRules .concat(pfxRules) .concat(flagRules) .reduce((acc, rule) => { acc.set(rule.id, rule); return acc; }, new Map()); return rules; } exports.processRules = processRules; const affFlag = { KEEPCASE: { isKeepCase: true }, WARN: { isWarning: true }, FORCEUCASE: { isForceUCase: true }, FORBIDDENWORD: { isForbiddenWord: true }, NOSUGGEST: { isNoSuggest: true }, NEEDAFFIX: { isNeedAffix: true }, COMPOUNDBEGIN: { canBeCompoundBegin: true }, COMPOUNDMIDDLE: { canBeCompoundMiddle: true }, COMPOUNDEND: { canBeCompoundEnd: true }, COMPOUNDFLAG: { isCompoundPermitted: true }, COMPOUNDPERMITFLAG: { isCompoundPermitted: true }, COMPOUNDFORBIDFLAG: { isCompoundForbidden: true }, ONLYINCOMPOUND: { isOnlyAllowedInCompound: true }, }; const _FlagToStringMap = { isCompoundPermitted: 'C', canBeCompoundBegin: 'B', canBeCompoundMiddle: 'M', canBeCompoundEnd: 'E', isOnlyAllowedInCompound: 'O', isWarning: 'W', isKeepCase: 'K', isForceUCase: 'U', isForbiddenWord: 'F', isNoSuggest: 'N', isNeedAffix: 'A', isCompoundForbidden: '-', }; const _FlagToLongStringMap = { isCompoundPermitted: 'CompoundPermitted', canBeCompoundBegin: 'CompoundBegin', canBeCompoundMiddle: 'CompoundMiddle', canBeCompoundEnd: 'CompoundEnd', isOnlyAllowedInCompound: 'OnlyInCompound', isWarning: 'Warning', isKeepCase: 'KeepCase', isForceUCase: 'ForceUpperCase', isForbiddenWord: 'Forbidden', isNoSuggest: 'NoSuggest', isNeedAffix: 'NeedAffix', isCompoundForbidden: 'CompoundForbidden', }; const flagToStringMap = _FlagToStringMap; const flagToLongStringMap = _FlagToLongStringMap; function logAffWord(affWord, message) { /* istanbul ignore if */ if (log) { const dump = util.inspect(affWord, { showHidden: false, depth: 5, colors: true }); console.log(`${message}: ${dump}`); } return affWord; } exports.logAffWord = logAffWord; /* istanbul ignore next */ function affWordToColoredString(affWord) { return util .inspect({ ...affWord, flags: flagsToString(affWord.flags) }, { showHidden: false, depth: 5, colors: true }) .replace(/(\s|\n|\r)+/g, ' '); } exports.affWordToColoredString = affWordToColoredString; /* istanbul ignore next */ function flagsToString(flags) { return [...Object.entries(flags)] .filter(([, v]) => !!v) .map(([k]) => flagToLongStringMap[k]) .sort() .join(':'); } exports.flagsToString = flagsToString; function asAffWord(word, rules = '', flags = {}) { return { word, base: word, prefix: '', suffix: '', rulesApplied: '', rules, flags, dic: rules ? word + '/' + rules : word, }; } exports.asAffWord = asAffWord; function compareAff(a, b) { if (a.word !== b.word) { return a.word < b.word ? -1 : 1; } const sigA = signature(a); const sigB = signature(b); return sigA < sigB ? -1 : sigA > sigB ? 1 : 0; } exports.compareAff = compareAff; /** * Returns a filter function that will filter adjacent AffWords * It compares the word and the flags. */ function filterAff() { return (0, util_1.filterOrderedList)((a, b) => a.word !== b.word || signature(a) !== signature(b)); } exports.filterAff = filterAff; exports.debug = { signature, }; function removeNeedAffix(flags) { const newFlags = { ...flags }; delete newFlags.isNeedAffix; return newFlags; } function adjustCompounding(affWord, minLength) { if (!affWord.flags.isCompoundPermitted || affWord.word.length >= minLength) { return affWord; } const { isCompoundPermitted: _, ...flags } = affWord.flags; affWord.flags = flags; return affWord; } //# sourceMappingURL=aff.js.map