UNPKG

hunspell-reader

Version:
145 lines 5.46 kB
import { opConcatMap, opMap, pipe } from '@cspell/cspell-pipe/sync'; import { removeAccents, toRange } from './textUtils.js'; export function affToDicInfo(aff, locale) { const alphabetInfo = extractAlphabet(aff, locale); return { ...alphabetInfo, ...extractSuggestionEditCosts(aff, alphabetInfo), locale, alphabet: toRange(alphabetInfo.alphabet, 5), accents: toRange([...alphabetInfo.accents].sort().join('')), }; } function extractAlphabet(aff, locale) { const sources = [ aff.MAP, aff.TRY, aff.KEY, aff.REP?.flatMap((rep) => [rep.match, rep.replaceWith]), aff.ICONV?.flatMap((cov) => [cov.from, cov.to]), aff.OCONV?.flatMap((cov) => [cov.from, cov.to]), extractFxLetters(aff.PFX), extractFxLetters(aff.SFX), ]; const setOfLetters = new Set(sources .filter(isDefined) .flat() .map((a) => a.normalize()) .flatMap((a) => [...a, ...a.toLocaleLowerCase(locale), ...a.toLocaleUpperCase(locale)]) .map((a) => a.trim()) .filter((a) => !!a)); const alphabet = [...setOfLetters].sort().join('').replaceAll(/\P{L}/gu, ''); const accents = new Set(alphabet.normalize('NFD').replaceAll(/\P{M}/gu, '')); return { locale, alphabet, accents }; } function isDefined(a) { return a !== undefined; } function extractSuggestionEditCosts(aff, alphaInfo) { const suggestionEditCosts = []; suggestionEditCosts.push(...calcCapsAndAccentReplacements(alphaInfo), ...calcAffMapReplacements(aff), ...calcAffRepReplacements(aff)); return { suggestionEditCosts, }; } function calcAffMapReplacements(aff) { if (!aff.MAP) return []; const map = aff.MAP.sort().join('|'); return [{ map, replace: 1, description: 'Hunspell Aff Map' }]; } function calcAffRepReplacements(aff) { if (!aff.REP) return []; return createCostMaps(aff.REP.map((rep) => [rep.match, rep.replaceWith]), { map: '', replace: 75, description: 'Hunspell Replace Map' }); } function calcCapsAndAccentReplacements(alphaInfo) { const { locale, alphabet } = alphaInfo; const letters = [...alphabet]; const capForms = letters.map((letter) => calcCapitalizationForms(letter, locale)); const accentForms = calcAccentForms(letters); const mapCrossAccent = calcCrossAccentCapsMap(accentForms, locale); return [ ...createCostMaps(capForms, { map: '', replace: 1, description: 'Capitalization change.' }), ...createCostMaps(accentForms, { map: '', replace: 1, description: 'Replace Accents' }), ...createCostMaps(mapCrossAccent, { map: '', replace: 2, description: 'Capitalization and Accent change.' }), ]; } function createCostMaps(formMaps, base) { const forms = formMaps.map((forms) => joinCharMap(forms)); const mapValues = [...new Set(forms)].sort().filter((a) => !!a); return [...groupsOfN(mapValues, 6)].map((mapValues) => ({ ...base, map: mapValues.join('|') })); } function calcCapitalizationForms(letter, locale) { const forms = new Set(); forms.add(letter); forms.add(letter.toUpperCase()); forms.add(letter.toLowerCase()); forms.add(letter.toLocaleUpperCase(locale)); forms.add(letter.toLocaleLowerCase(locale)); forms.add(letter.toLocaleUpperCase(locale).toLocaleLowerCase(locale)); forms.add(letter.toLocaleLowerCase(locale).toLocaleUpperCase(locale)); return forms; } function calcAccentForms(letters) { const forms = new Map(); function getForm(letter) { const f = forms.get(letter); if (f) return f; const s = new Set(); forms.set(letter, s); return s; } for (const letter of letters) { const base = removeAccents(letter); const formCollection = getForm(base); formCollection.add(base); formCollection.add(letter); // addAccents(base, accents, formCollection); } return [...forms.values()].filter((s) => s.size > 1); } function joinCharMap(values) { return [...values] .sort() .map((a) => (a.length > 1 ? '(' + a + ')' : a)) .join(''); } function calcCrossAccentCapsMap(accentForms, locale) { function calc(form) { return new Set(pipe(form, opConcatMap((letter) => calcCapitalizationForms(letter, locale)))); } const values = pipe(accentForms, opMap(calc)); return [...values]; } // function addAccents(cleanLetter: string, accents: Iterable<string>, collection: Set<string>) { // for (const accent of accents) { // collection.add(applyAccent(cleanLetter, accent)); // } // } // function applyAccent(letter: string, accent: string): string { // const withAccent = (letter + accent).normalize('NFC'); // return removeLooseAccents(withAccent); // } function extractFxLetters(fxm) { if (!fxm) return undefined; const substations = pipe(fxm.values(), opConcatMap((f) => f.substitutionSets.values()), opConcatMap((s) => s.substitutions)); const partials = pipe(substations, opConcatMap((sub) => [sub.remove, sub.attach])); return [...partials]; } function* groupsOfN(values, n) { let buffer = []; for (const item of values) { buffer.push(item); if (buffer.length >= n) { yield buffer; buffer = []; } } if (buffer.length) { yield buffer; } } //# sourceMappingURL=affToDicInfo.js.map