js-mdict

Version:

mdict (*.mdx, *.mdd) file reader. Licensed under AGPL-3.0 for better community cooperation and commercial value protection.

github.com/terasum/js-mdict

terasum/js-mdict

55 lines (54 loc) • 2.71 kB

TypeScript

View Raw

import MdictBase, { KeyWordItem, KeyInfoItem, MDictOptions } from './mdict-base.js'; export declare class Mdict extends MdictBase { constructor(fname: string, options?: Partial<MDictOptions>); /** * lookupKeyInfoItem lookup the `keyInfoItem` * the `keyInfoItem` contains key-word record block location: recordStartOffset * the `recordStartOffset` should indicate the unpacked record data relative offset * @param word the target word phrase */ lookupKeyBlockByWord(word: string, isAssociate?: boolean): KeyWordItem | undefined; /** * locate the record meaning buffer by `keyListItem` * the `KeyBlockItem.recordStartOffset` should indicate the record block info location * use the record block info, we can get the `recordBuffer`, then we need decrypt and decompress * use decompressed `recordBuffer` we can get the total block which contains meanings * then, use: * const start = item.recordStartOffset - recordBlockInfo.unpackAccumulatorOffset; * const end = item.recordEndOffset - recordBlockInfo.unpackAccumulatorOffset; * the finally meaning's buffer is `unpackRecordBlockBuff[start, end]` * @param item */ lookupRecordByKeyBlock(item: KeyWordItem): Uint8Array<ArrayBuffer>; /** * lookupPartialKeyInfoListById * decode key block by key block id, and we can get the partial key list * the key list just contains the partial key list * @param {number} keyInfoId key block id * @return {KeyWordItem[]} */ lookupPartialKeyBlockListByKeyInfoId(keyInfoId: number): KeyWordItem[]; /** * lookupInfoBlock reduce word find the nearest key block * @param {string} word searching phrase * @param keyInfoList */ lookupKeyInfoByWord(word: string, keyInfoList?: KeyInfoItem[]): number; private decompressBuff; /** * find record which record start locate * @param {number} recordStart record start offset */ private reduceRecordBlockInfo; close(): void; } /** * 经过一系列测试, 发现mdx格式的文件存在较大的词语排序问题，存在如下情况： * 1. 大小写的问题比如 a-zA-Z 和 aA-zZ 这种并存的情况 * 2. 多语言的情况，存在英文和汉字比较大小的情况一般情况下英文应当排在汉字前面 * 3. 小语种的情况 * 上述的这些情况都有可能出现，无法通过字典头中的设置实现排序，所以无法通过内部的keyInfoList进行快速索引， * 在现代计算机的性能条件下，直接遍历全部词条也可得到较好的效果，因此目前采用的策略是全部读取词条，内部排序 * */ export default Mdict;