hunspell-reader
Version:
A library for reading Hunspell Dictionary Files
147 lines • 5.51 kB
JavaScript
;
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.createMatchingWordsFilter = exports.IterableHunspellReader = void 0;
const fs = __importStar(require("fs/promises"));
const gensequence_1 = require("gensequence");
const iconv_lite_1 = require("iconv-lite");
const affReader_1 = require("./affReader");
const util_1 = require("./util");
const defaultEncoding = 'UTF-8';
class IterableHunspellReader {
constructor(src) {
this.src = src;
this.aff = src.aff;
}
get dic() {
return this.src.dic;
}
set maxDepth(value) {
this.aff.maxSuffixDepth = value;
}
get maxDepth() {
return this.aff.maxSuffixDepth;
}
/** the number of .dic entries */
get size() {
return this.src.dic.length;
}
/**
* @internal
*/
dicWordsSeq() {
return (0, gensequence_1.genSequence)(this.src.dic).map((line) => {
const [word, rules] = line.split('/', 2);
return { word, rules, prefixes: [], suffixes: [] };
});
}
/**
* iterates through the root words of the dictionary
*/
iterateRootWords() {
return this.seqRootWords();
}
iterateWords() {
return this.seqWords();
}
/**
* Iterator for all the words in the dictionary. The words are in the order found in the .dic after the
* transformations have been applied. Forbidden and CompoundOnly words are filtered out.
*/
[Symbol.iterator]() {
return this.wholeWords();
}
/**
* create an iterable sequence of the words in the dictionary.
*
* @param tapPreApplyRules -- optional function to be called before rules are applied to a word.
* It is mostly used for monitoring progress in combination with `size`.
*/
seqAffWords(tapPreApplyRules, maxDepth) {
return this.seqTransformDictionaryEntries(tapPreApplyRules, maxDepth).concatMap((a) => a);
}
/**
* create an iterable sequence of the words in the dictionary.
*
* @param tapPreApplyRules -- optional function to be called before rules are applied to a word.
* It is mostly used for monitoring progress in combination with `size`.
*/
seqTransformDictionaryEntries(tapPreApplyRules, maxDepth) {
const seq = (0, gensequence_1.genSequence)(this.src.dic);
let count = 0;
const dicWords = tapPreApplyRules ? seq.map((a) => (tapPreApplyRules(a, count++), a)) : seq;
return dicWords.map((dicWord) => this.aff.applyRulesToDicEntry(dicWord, maxDepth));
}
/**
* Iterator for all the words in the dictionary. The words are in the order found in the .dic after the
* transformations have been applied. Forbidden and CompoundOnly ARE INCLUDED.
*
* @internal
*/
seqWords() {
return this.seqAffWords()
.map((w) => w.word)
.filter(createMatchingWordsFilter());
}
/**
* Returns an iterable that will only return stand alone words.
*/
wholeWords() {
return (this.seqAffWords()
// Filter out words that are forbidden or only allowed in Compounds.
.filter((w) => !w.flags.isForbiddenWord && !w.flags.isOnlyAllowedInCompound)
.map((w) => w.word)
.filter(createMatchingWordsFilter()));
}
/**
* @internal
*/
seqRootWords() {
return this.dicWordsSeq().map((w) => w.word);
}
/**
*
* @param affFile - path to aff file.
* @param dicFile - path to dic file.
* @returns IterableHunspellReader
*/
static async createFromFiles(affFile, dicFile) {
const aff = await (0, affReader_1.parseAffFileToAff)(affFile, defaultEncoding);
const buffer = await fs.readFile(dicFile);
const dicFileContent = (0, iconv_lite_1.decode)(buffer, aff.affInfo.SET);
const dic = dicFileContent
.split('\n')
.slice(1) // The first entry is the count of entries.
.map((a) => a.trim())
.filter((line) => !!line);
return new IterableHunspellReader({ aff, dic });
}
}
exports.IterableHunspellReader = IterableHunspellReader;
function createMatchingWordsFilter() {
return (0, util_1.filterOrderedList)((a, b) => a !== b);
}
exports.createMatchingWordsFilter = createMatchingWordsFilter;
//# sourceMappingURL=IterableHunspellReader.js.map