hunspell-reader
Version:
A library for reading Hunspell Dictionary Files
123 lines • 4.29 kB
JavaScript
import * as fs from 'node:fs/promises';
import { genSequence } from 'gensequence';
import pkgIconvLite from 'iconv-lite';
import { parseAffFileToAffLegacy } from './affReader.js';
import { filterOrderedList } from './util.js';
const { decode } = pkgIconvLite;
// eslint-disable-next-line unicorn/text-encoding-identifier-case
const defaultEncoding = 'UTF-8';
export class IterableHunspellReaderLegacy {
src;
aff;
constructor(src) {
this.src = src;
this.aff = src.aff;
}
get dic() {
return this.src.dic;
}
set maxDepth(value) {
this.aff.maxSuffixDepth = value;
}
get maxDepth() {
return this.aff.maxSuffixDepth;
}
/** the number of .dic entries */
get size() {
return this.src.dic.length;
}
/**
* @internal
*/
dicWordsSeq() {
return genSequence(this.src.dic).map((line) => {
const [word, rules] = line.split('/', 2);
return { word, rules, prefixes: [], suffixes: [] };
});
}
/**
* iterates through the root words of the dictionary
*/
iterateRootWords() {
return this.seqRootWords();
}
iterateWords() {
return this.seqWords();
}
/**
* Iterator for all the words in the dictionary. The words are in the order found in the .dic after the
* transformations have been applied. Forbidden and CompoundOnly words are filtered out.
*/
[Symbol.iterator]() {
return this.wholeWords();
}
/**
* create an iterable sequence of the words in the dictionary.
*
* @param tapPreApplyRules -- optional function to be called before rules are applied to a word.
* It is mostly used for monitoring progress in combination with `size`.
*/
seqAffWords(tapPreApplyRules, maxDepth) {
return this.seqTransformDictionaryEntries(tapPreApplyRules, maxDepth).concatMap((a) => a);
}
/**
* create an iterable sequence of the words in the dictionary.
*
* @param tapPreApplyRules -- optional function to be called before rules are applied to a word.
* It is mostly used for monitoring progress in combination with `size`.
*/
seqTransformDictionaryEntries(tapPreApplyRules, maxDepth) {
const seq = genSequence(this.src.dic);
let count = 0;
const dicWords = tapPreApplyRules ? seq.map((a) => (tapPreApplyRules(a, count++), a)) : seq;
return dicWords.map((dicWord) => this.aff.applyRulesToDicEntry(dicWord, maxDepth));
}
/**
* Iterator for all the words in the dictionary. The words are in the order found in the .dic after the
* transformations have been applied. Forbidden and CompoundOnly ARE INCLUDED.
*
* @internal
*/
seqWords() {
return this.seqAffWords()
.map((w) => w.word)
.filter(createMatchingWordsFilter());
}
/**
* Returns an iterable that will only return stand alone words.
*/
wholeWords() {
return (this.seqAffWords()
// Filter out words that are forbidden or only allowed in Compounds.
.filter((w) => !w.flags.isForbiddenWord && !w.flags.isOnlyAllowedInCompound)
.map((w) => w.word)
.filter(createMatchingWordsFilter()));
}
/**
* @internal
*/
seqRootWords() {
return this.dicWordsSeq().map((w) => w.word);
}
/**
*
* @param affFile - path to aff file.
* @param dicFile - path to dic file.
* @returns IterableHunspellReader
*/
static async createFromFiles(affFile, dicFile) {
const aff = await parseAffFileToAffLegacy(affFile, defaultEncoding);
const buffer = await fs.readFile(dicFile);
const dicFileContent = decode(buffer, aff.affInfo.SET);
const dic = dicFileContent
.split('\n')
.slice(1) // The first entry is the count of entries.
.map((a) => a.trim())
.filter((line) => !!line);
return new IterableHunspellReaderLegacy({ aff, dic });
}
}
export function createMatchingWordsFilter() {
return filterOrderedList((a, b) => a !== b);
}
//# sourceMappingURL=IterableHunspellReaderLegacy.js.map