UNPKG

hunspell-reader

Version:

A library for reading Hunspell Dictionary Files

github.com/streetsidesoftware/cspell/tree/main/packages/hunspell-reader

streetsidesoftware/cspell

123 lines • 4.29 kB

JavaScript

import * as fs from 'node:fs/promises'; import { genSequence } from 'gensequence'; import pkgIconvLite from 'iconv-lite'; import { parseAffFileToAffLegacy } from './affReader.js'; import { filterOrderedList } from './util.js'; const { decode } = pkgIconvLite; // eslint-disable-next-line unicorn/text-encoding-identifier-case const defaultEncoding = 'UTF-8'; export class IterableHunspellReaderLegacy { src; aff; constructor(src) { this.src = src; this.aff = src.aff; } get dic() { return this.src.dic; } set maxDepth(value) { this.aff.maxSuffixDepth = value; } get maxDepth() { return this.aff.maxSuffixDepth; } /** the number of .dic entries */ get size() { return this.src.dic.length; } /** * @internal */ dicWordsSeq() { return genSequence(this.src.dic).map((line) => { const [word, rules] = line.split('/', 2); return { word, rules, prefixes: [], suffixes: [] }; }); } /** * iterates through the root words of the dictionary */ iterateRootWords() { return this.seqRootWords(); } iterateWords() { return this.seqWords(); } /** * Iterator for all the words in the dictionary. The words are in the order found in the .dic after the * transformations have been applied. Forbidden and CompoundOnly words are filtered out. */ [Symbol.iterator]() { return this.wholeWords(); } /** * create an iterable sequence of the words in the dictionary. * * @param tapPreApplyRules -- optional function to be called before rules are applied to a word. * It is mostly used for monitoring progress in combination with `size`. */ seqAffWords(tapPreApplyRules, maxDepth) { return this.seqTransformDictionaryEntries(tapPreApplyRules, maxDepth).concatMap((a) => a); } /** * create an iterable sequence of the words in the dictionary. * * @param tapPreApplyRules -- optional function to be called before rules are applied to a word. * It is mostly used for monitoring progress in combination with `size`. */ seqTransformDictionaryEntries(tapPreApplyRules, maxDepth) { const seq = genSequence(this.src.dic); let count = 0; const dicWords = tapPreApplyRules ? seq.map((a) => (tapPreApplyRules(a, count++), a)) : seq; return dicWords.map((dicWord) => this.aff.applyRulesToDicEntry(dicWord, maxDepth)); } /** * Iterator for all the words in the dictionary. The words are in the order found in the .dic after the * transformations have been applied. Forbidden and CompoundOnly ARE INCLUDED. * * @internal */ seqWords() { return this.seqAffWords() .map((w) => w.word) .filter(createMatchingWordsFilter()); } /** * Returns an iterable that will only return stand alone words. */ wholeWords() { return (this.seqAffWords() // Filter out words that are forbidden or only allowed in Compounds. .filter((w) => !w.flags.isForbiddenWord && !w.flags.isOnlyAllowedInCompound) .map((w) => w.word) .filter(createMatchingWordsFilter())); } /** * @internal */ seqRootWords() { return this.dicWordsSeq().map((w) => w.word); } /** * * @param affFile - path to aff file. * @param dicFile - path to dic file. * @returns IterableHunspellReader */ static async createFromFiles(affFile, dicFile) { const aff = await parseAffFileToAffLegacy(affFile, defaultEncoding); const buffer = await fs.readFile(dicFile); const dicFileContent = decode(buffer, aff.affInfo.SET); const dic = dicFileContent .split('\n') .slice(1) // The first entry is the count of entries. .map((a) => a.trim()) .filter((line) => !!line); return new IterableHunspellReaderLegacy({ aff, dic }); } } export function createMatchingWordsFilter() { return filterOrderedList((a, b) => a !== b); } //# sourceMappingURL=IterableHunspellReaderLegacy.js.map