UNPKG

hunspell-reader

Version:
149 lines 5.74 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.affToDicInfo = void 0; const sync_1 = require("@cspell/cspell-pipe/sync"); const textUtils_1 = require("./textUtils"); function affToDicInfo(aff, locale) { const alphabetInfo = extractAlphabet(aff, locale); return { ...alphabetInfo, ...extractSuggestionEditCosts(aff, alphabetInfo), locale, alphabet: (0, textUtils_1.toRange)(alphabetInfo.alphabet, 5), accents: (0, textUtils_1.toRange)([...alphabetInfo.accents].sort().join('')), }; } exports.affToDicInfo = affToDicInfo; function extractAlphabet(aff, locale) { const sources = [ aff.MAP, aff.TRY, aff.KEY, aff.REP?.flatMap((rep) => [rep.match, rep.replaceWith]), aff.ICONV?.flatMap((cov) => [cov.from, cov.to]), aff.OCONV?.flatMap((cov) => [cov.from, cov.to]), extractFxLetters(aff.PFX), extractFxLetters(aff.SFX), ]; const setOfLetters = new Set(sources .filter(isDefined) .flatMap((a) => a) .map((a) => a.normalize()) .flatMap((a) => [...a, ...a.toLocaleLowerCase(locale), ...a.toLocaleUpperCase(locale)]) .map((a) => a.trim()) .filter((a) => !!a)); const alphabet = [...setOfLetters].sort().join('').replace(/\P{L}/gu, ''); const accents = new Set(alphabet.normalize('NFD').replace(/\P{M}/gu, '')); return { locale, alphabet, accents }; } function isDefined(a) { return a !== undefined; } function extractSuggestionEditCosts(aff, alphaInfo) { const suggestionEditCosts = []; suggestionEditCosts.push(...calcCapsAndAccentReplacements(alphaInfo), ...calcAffMapReplacements(aff), ...calcAffRepReplacements(aff)); return { suggestionEditCosts, }; } function calcAffMapReplacements(aff) { if (!aff.MAP) return []; const map = aff.MAP.sort().join('|'); return [{ map, replace: 1, description: 'Hunspell Aff Map' }]; } function calcAffRepReplacements(aff) { if (!aff.REP) return []; return createCostMaps(aff.REP.map((rep) => [rep.match, rep.replaceWith]), { map: '', replace: 75, description: 'Hunspell Replace Map' }); } function calcCapsAndAccentReplacements(alphaInfo) { const { locale, alphabet } = alphaInfo; const letters = [...alphabet]; const capForms = letters.map((letter) => calcCapitalizationForms(letter, locale)); const accentForms = calcAccentForms(letters); const mapCrossAccent = calcCrossAccentCapsMap(accentForms, locale); return [ ...createCostMaps(capForms, { map: '', replace: 1, description: 'Capitalization change.' }), ...createCostMaps(accentForms, { map: '', replace: 1, description: 'Replace Accents' }), ...createCostMaps(mapCrossAccent, { map: '', replace: 2, description: 'Capitalization and Accent change.' }), ]; } function createCostMaps(formMaps, base) { const forms = formMaps.map((forms) => joinCharMap(forms)); const mapValues = [...new Set(forms)].sort().filter((a) => !!a); return [...groupsOfN(mapValues, 6)].map((mapValues) => ({ ...base, map: mapValues.join('|') })); } function calcCapitalizationForms(letter, locale) { const forms = new Set(); forms.add(letter); forms.add(letter.toUpperCase()); forms.add(letter.toLowerCase()); forms.add(letter.toLocaleUpperCase(locale)); forms.add(letter.toLocaleLowerCase(locale)); forms.add(letter.toLocaleUpperCase(locale).toLocaleLowerCase(locale)); forms.add(letter.toLocaleLowerCase(locale).toLocaleUpperCase(locale)); return forms; } function calcAccentForms(letters) { const forms = new Map(); function getForm(letter) { const f = forms.get(letter); if (f) return f; const s = new Set(); forms.set(letter, s); return s; } for (const letter of letters) { const base = (0, textUtils_1.removeAccents)(letter); const formCollection = getForm(base); formCollection.add(base); formCollection.add(letter); // addAccents(base, accents, formCollection); } return [...forms.values()].filter((s) => s.size > 1); } function joinCharMap(values) { return [...values] .sort() .map((a) => (a.length > 1 ? '(' + a + ')' : a)) .join(''); } function calcCrossAccentCapsMap(accentForms, locale) { function calc(form) { return new Set((0, sync_1.pipe)(form, (0, sync_1.opConcatMap)((letter) => calcCapitalizationForms(letter, locale)))); } const values = (0, sync_1.pipe)(accentForms, (0, sync_1.opMap)(calc)); return [...values]; } // function addAccents(cleanLetter: string, accents: Iterable<string>, collection: Set<string>) { // for (const accent of accents) { // collection.add(applyAccent(cleanLetter, accent)); // } // } // function applyAccent(letter: string, accent: string): string { // const withAccent = (letter + accent).normalize('NFC'); // return removeLooseAccents(withAccent); // } function extractFxLetters(fxm) { if (!fxm) return undefined; const substations = (0, sync_1.pipe)(fxm.values(), (0, sync_1.opConcatMap)((f) => f.substitutionSets.values()), (0, sync_1.opConcatMap)((s) => s.substitutions)); const partials = (0, sync_1.pipe)(substations, (0, sync_1.opConcatMap)((sub) => [sub.remove, sub.attach])); return [...partials]; } function* groupsOfN(values, n) { let buffer = []; for (const item of values) { buffer.push(item); if (buffer.length >= n) { yield buffer; buffer = []; } } if (buffer.length) { yield buffer; } } //# sourceMappingURL=affToDicInfo.js.map