UNPKG

hunspell-reader

Version:

A library for reading Hunspell Dictionary Files

github.com/streetsidesoftware/cspell/tree/main/packages/hunspell-reader

streetsidesoftware/cspell

147 lines • 5.51 kB

JavaScript

"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); __setModuleDefault(result, mod); return result; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.createMatchingWordsFilter = exports.IterableHunspellReader = void 0; const fs = __importStar(require("fs/promises")); const gensequence_1 = require("gensequence"); const iconv_lite_1 = require("iconv-lite"); const affReader_1 = require("./affReader"); const util_1 = require("./util"); const defaultEncoding = 'UTF-8'; class IterableHunspellReader { constructor(src) { this.src = src; this.aff = src.aff; } get dic() { return this.src.dic; } set maxDepth(value) { this.aff.maxSuffixDepth = value; } get maxDepth() { return this.aff.maxSuffixDepth; } /** the number of .dic entries */ get size() { return this.src.dic.length; } /** * @internal */ dicWordsSeq() { return (0, gensequence_1.genSequence)(this.src.dic).map((line) => { const [word, rules] = line.split('/', 2); return { word, rules, prefixes: [], suffixes: [] }; }); } /** * iterates through the root words of the dictionary */ iterateRootWords() { return this.seqRootWords(); } iterateWords() { return this.seqWords(); } /** * Iterator for all the words in the dictionary. The words are in the order found in the .dic after the * transformations have been applied. Forbidden and CompoundOnly words are filtered out. */ [Symbol.iterator]() { return this.wholeWords(); } /** * create an iterable sequence of the words in the dictionary. * * @param tapPreApplyRules -- optional function to be called before rules are applied to a word. * It is mostly used for monitoring progress in combination with `size`. */ seqAffWords(tapPreApplyRules, maxDepth) { return this.seqTransformDictionaryEntries(tapPreApplyRules, maxDepth).concatMap((a) => a); } /** * create an iterable sequence of the words in the dictionary. * * @param tapPreApplyRules -- optional function to be called before rules are applied to a word. * It is mostly used for monitoring progress in combination with `size`. */ seqTransformDictionaryEntries(tapPreApplyRules, maxDepth) { const seq = (0, gensequence_1.genSequence)(this.src.dic); let count = 0; const dicWords = tapPreApplyRules ? seq.map((a) => (tapPreApplyRules(a, count++), a)) : seq; return dicWords.map((dicWord) => this.aff.applyRulesToDicEntry(dicWord, maxDepth)); } /** * Iterator for all the words in the dictionary. The words are in the order found in the .dic after the * transformations have been applied. Forbidden and CompoundOnly ARE INCLUDED. * * @internal */ seqWords() { return this.seqAffWords() .map((w) => w.word) .filter(createMatchingWordsFilter()); } /** * Returns an iterable that will only return stand alone words. */ wholeWords() { return (this.seqAffWords() // Filter out words that are forbidden or only allowed in Compounds. .filter((w) => !w.flags.isForbiddenWord && !w.flags.isOnlyAllowedInCompound) .map((w) => w.word) .filter(createMatchingWordsFilter())); } /** * @internal */ seqRootWords() { return this.dicWordsSeq().map((w) => w.word); } /** * * @param affFile - path to aff file. * @param dicFile - path to dic file. * @returns IterableHunspellReader */ static async createFromFiles(affFile, dicFile) { const aff = await (0, affReader_1.parseAffFileToAff)(affFile, defaultEncoding); const buffer = await fs.readFile(dicFile); const dicFileContent = (0, iconv_lite_1.decode)(buffer, aff.affInfo.SET); const dic = dicFileContent .split('\n') .slice(1) // The first entry is the count of entries. .map((a) => a.trim()) .filter((line) => !!line); return new IterableHunspellReader({ aff, dic }); } } exports.IterableHunspellReader = IterableHunspellReader; function createMatchingWordsFilter() { return (0, util_1.filterOrderedList)((a, b) => a !== b); } exports.createMatchingWordsFilter = createMatchingWordsFilter; //# sourceMappingURL=IterableHunspellReader.js.map