@iwater/mdict-ts
Version:
mdict (*.mdx, *.mdd) file reader
282 lines (281 loc) • 10.9 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.Mdict = void 0;
const util_1 = require("./util");
const mdict_parser_1 = require("./mdict-parser");
class Mdict extends mdict_parser_1.MDictParser {
constructor(file) {
super(file);
this.mutual_ticket = 0;
this.trail = null;
}
static async build(file) {
const instance = new Mdict(file);
await instance.init();
return instance;
}
reduce(arr, phrase) {
let len = arr.length;
if (len > 1) {
len = len >> 1;
return phrase > this.adaptKey(arr[len - 1].last_word) ?
this.reduce(arr.slice(len), phrase) :
this.reduce(arr.slice(0, len), phrase);
}
else {
return arr[0];
}
}
shrink(arr, phrase) {
let len = arr.length, sub;
if (len > 1) {
len = len >> 1;
let key = this.adaptKey(arr[len].word);
if (phrase < key) {
sub = arr.slice(0, len);
sub.pos = arr.pos;
}
else {
sub = arr.slice(len);
sub.pos = (arr.pos || 0) + len;
}
return this.shrink(sub, phrase);
}
else {
return (arr.pos || 0) + (phrase <= this.adaptKey(arr[0].word) ? 0 : 1);
}
}
loadKeys(kdx) {
if (this.cached_keys && this.cached_keys.pilot === kdx.first_word) {
return util_1.resolve(this.cached_keys.list);
}
else {
return this.slicedKeyBlock.then(input => {
let scanner = this.scan.init(input), list = Array(kdx.num_entries);
scanner.forward(kdx.offset);
scanner = scanner.readBlock(kdx.comp_size, kdx.decomp_size);
for (let i = 0; i < kdx.num_entries; i++) {
let offset = scanner.readNum();
list[i] = {
word: scanner.readText(),
offset
};
if (i > 0)
list[i - 1].size = offset - list[i - 1].offset;
}
this.cached_keys = {
list,
pilot: kdx.first_word
};
return list;
});
}
}
seekVanguard(phrase) {
phrase = this.adaptKey(phrase);
let kdx = this.reduce(this.KEY_INDEX, phrase);
if (phrase <= this.adaptKey(kdx.last_word)) {
let index = kdx.index - 1, prev;
while (prev = this.KEY_INDEX[index]) {
if (this.adaptKey(prev.last_word) !== this.adaptKey(kdx.last_word))
break;
kdx = prev;
index--;
}
}
return this.loadKeys(kdx).then(list => {
let idx = this.shrink(list, phrase);
while (idx > 0) {
if (this.adaptKey(list[--idx].word) !== this.adaptKey(phrase)) {
idx++;
break;
}
}
return [kdx, Math.min(idx, list.length - 1), list];
});
}
appendMore(word, list, nextKdx, expectedSize, filter, ticket) {
if (ticket !== this.mutual_ticket)
throw 'force terminated';
if (filter) {
if (this.trail.count < expectedSize && nextKdx && nextKdx.first_word.substr(0, word.length) === word) {
return this.loadKeys(nextKdx).then(more => {
this.trail.offset = 0;
this.trail.block = nextKdx.index;
Array.prototype.push.apply(list, more.filter(filter, this.trail));
return this.appendMore(word, list, this.KEY_INDEX[nextKdx.index + 1], expectedSize, filter, ticket);
});
}
else {
if (list.length === 0)
this.trail.exhausted = true;
return util_1.resolve(list);
}
}
else {
let shortage = expectedSize - list.length;
if (shortage > 0 && nextKdx) {
console.log('go next', nextKdx);
this.trail.block = nextKdx.index;
return this.loadKeys(nextKdx).then(more => {
this.trail.offset = 0;
this.trail.pos = Math.min(shortage, more.length);
Array.prototype.push.apply(list, more.slice(0, shortage));
console.log('$$ ' + more[shortage - 1], shortage);
return this.appendMore(word, list, this.KEY_INDEX[nextKdx.index + 1], expectedSize, filter, ticket);
});
}
else {
if (this.trail.pos > expectedSize) {
this.trail.pos = expectedSize;
}
list = list.slice(0, expectedSize);
this.trail.count = list.length;
this.trail.total += this.trail.count;
return util_1.resolve(list);
}
}
}
followUp() {
let kdx = this.KEY_INDEX[this.trail.block];
return this.loadKeys(kdx).then(list => {
return [kdx, Math.min(this.trail.offset + this.trail.pos, list.length - 1), list];
});
}
matchKeys(phrase, expectedSize = 0, follow) {
let filter;
expectedSize = Math.max(expectedSize, 10);
let str = phrase.trim().toLowerCase(), m = /([^?*]+)[?*]+/.exec(str), word;
if (m) {
word = m[1];
const wildcard = new RegExp('^' +
str.replace(/([.\\+\[^\]$()])/g, '\\$1')
.replace(/\*+/g, '.*')
.replace(/\?/g, '.') + '$'), tester = phrase[phrase.length - 1] === ' ' ? s => wildcard.test(s) : s => wildcard.test(s) && !/ /.test(s);
filter = (s, i) => {
if (this.trail.count < expectedSize && tester(s)) {
this.trail.count++;
this.trail.total++;
this.trail.pos = i + 1;
return true;
}
return false;
};
}
else {
word = phrase.trim();
}
if (this.trail && this.trail.phrase !== phrase)
follow = false;
if (follow && this.trail && this.trail.exhausted)
return util_1.resolve([]);
let startFrom = follow && this.trail ? this.followUp() : this.seekVanguard(word);
return startFrom.then(([kdx, idx, list]) => {
console.log('start ', kdx);
list = list.slice(idx);
this.trail = {
phrase: phrase,
block: kdx.index,
offset: idx,
pos: list.length,
count: 0,
total: follow ? this.trail && this.trail.total || 0 : 0
};
if (filter)
list = list.filter(filter, this.trail);
return this.appendMore(word, list, this.KEY_INDEX[kdx.index + 1], expectedSize, filter, ++this.mutual_ticket)
.then(result => {
if (this.trail.block === this.KEY_INDEX.length - 1) {
if (this.trail.offset + this.trail.pos >= this.KEY_INDEX[this.trail.block].num_entries) {
this.trail.exhausted = true;
console.log('EXHAUSTED!!!!');
}
}
console.log('trail: ', this.trail);
return result;
});
});
}
matchOffset(list, offset) {
return list.some(el => {
el.offset === offset ? list = [el] : false;
}) ? list : [];
}
readDefinition(input, block, offset) {
let scanner = this.scan.init(input).readBlock(block.comp_size, block.decomp_size);
scanner.forward(offset - block.decomp_offset);
return scanner.readText();
}
async redirects(definition) {
if ((definition.substring(0, 8) !== '@@@LINK=')) {
return definition;
}
const name = definition.substring(8);
let results = await this.mdx(name);
results = results.filter(({ word }) => word === name);
if (results.length > 0) {
return await this.getDefinition(results[0].offset);
}
return '';
}
read_object(input, block, keyInfo) {
if (input.byteLength > 0) {
let scanner = this.scan.init(input).readBlock(block.comp_size, block.decomp_size);
scanner.forward(keyInfo.offset - block.decomp_offset);
return scanner.readRaw(keyInfo.size);
}
else {
throw '* OUT OF FILE RANGE * ' + keyInfo + ' @offset=' + block.comp_offset;
}
}
findResource(keyInfo) {
let block = this.RECORD_BLOCK_TABLE.find(keyInfo.offset);
return this.read(block.comp_offset, block.comp_size).then(res => this.read_object(res, block, keyInfo));
}
mdx(query, offset) {
if (typeof query === 'string' || query instanceof String) {
this.trail = null;
let word = query.trim().toLowerCase();
return this.seekVanguard(word).then(([kdx, idx, list]) => {
list = list.slice(idx);
if (offset)
list = this.matchOffset(list, offset);
return list;
});
}
else {
return this.matchKeys(query.word, query.max, query.follow);
}
}
mdd(query) {
let word = query.trim().toLowerCase();
word = '\\' + word.replace(/(^[/\\])|([/]$)/, '');
word = word.replace(/\//g, '\\');
return this.seekVanguard(word).then(([, , list]) => {
// TODO: check why seekVanguard return error value
return list.filter(e => e.word.toLowerCase() === word);
}).then(candidates => {
if (candidates.length === 0) {
throw '*RESOURCE NOT FOUND* ' + query;
}
else {
return this.findResource(candidates[0]);
}
});
}
getWordList(query, offset) {
return this.ext === 'mdx' ? this.mdx(query, offset) : this.mdd(query);
}
getDefinition(offset) {
let block = this.RECORD_BLOCK_TABLE.find(offset);
return this.read(block.comp_offset, block.comp_size).then((data) => {
return this.readDefinition(data, block, offset).trim();
}).then((definition) => {
if (this.StyleSheet.length)
definition = util_1.parseRes(definition, this.StyleSheet);
return this.redirects(definition);
});
}
}
exports.Mdict = Mdict;
module.exports = Mdict;