UNPKG

js-mdict

Version:

mdict (*.mdx, *.mdd) file reader. Licensed under AGPL-3.0 for better community cooperation and commercial value protection.

55 lines (54 loc) 2.71 kB
import MdictBase, { KeyWordItem, KeyInfoItem, MDictOptions } from './mdict-base.js'; export declare class Mdict extends MdictBase { constructor(fname: string, options?: Partial<MDictOptions>); /** * lookupKeyInfoItem lookup the `keyInfoItem` * the `keyInfoItem` contains key-word record block location: recordStartOffset * the `recordStartOffset` should indicate the unpacked record data relative offset * @param word the target word phrase */ lookupKeyBlockByWord(word: string, isAssociate?: boolean): KeyWordItem | undefined; /** * locate the record meaning buffer by `keyListItem` * the `KeyBlockItem.recordStartOffset` should indicate the record block info location * use the record block info, we can get the `recordBuffer`, then we need decrypt and decompress * use decompressed `recordBuffer` we can get the total block which contains meanings * then, use: * const start = item.recordStartOffset - recordBlockInfo.unpackAccumulatorOffset; * const end = item.recordEndOffset - recordBlockInfo.unpackAccumulatorOffset; * the finally meaning's buffer is `unpackRecordBlockBuff[start, end]` * @param item */ lookupRecordByKeyBlock(item: KeyWordItem): Uint8Array<ArrayBuffer>; /** * lookupPartialKeyInfoListById * decode key block by key block id, and we can get the partial key list * the key list just contains the partial key list * @param {number} keyInfoId key block id * @return {KeyWordItem[]} */ lookupPartialKeyBlockListByKeyInfoId(keyInfoId: number): KeyWordItem[]; /** * lookupInfoBlock reduce word find the nearest key block * @param {string} word searching phrase * @param keyInfoList */ lookupKeyInfoByWord(word: string, keyInfoList?: KeyInfoItem[]): number; private decompressBuff; /** * find record which record start locate * @param {number} recordStart record start offset */ private reduceRecordBlockInfo; close(): void; } /** * 经过一系列测试, 发现mdx格式的文件存在较大的词语排序问题,存在如下情况: * 1. 大小写的问题 比如 a-zA-Z 和 aA-zZ 这种并存的情况 * 2. 多语言的情况,存在英文和汉字比较大小的情况一般情况下 英文应当排在汉字前面 * 3. 小语种的情况 * 上述的这些情况都有可能出现,无法通过字典头中的设置实现排序,所以无法通过内部的keyInfoList进行快速索引, * 在现代计算机的性能条件下,直接遍历全部词条也可得到较好的效果,因此目前采用的策略是全部读取词条,内部排序 * */ export default Mdict;