kusamoji
Version:
Japanese morphological analyzer for Node.js — Viterbi tokenizer with mmap dict loading and pluggable POS-source strategy
108 lines (96 loc) • 3.18 kB
JavaScript
;
let path = require("path");
let DynamicDictionaries = require("../dict/DynamicDictionaries");
/**
* DictionaryLoader base constructor
* @param {string} dic_path Dictionary path
* @constructor
*/
function DictionaryLoader(dic_path) {
this.dic = new DynamicDictionaries();
this.dic_path = dic_path;
}
/**
* Load a single .dat file from disk — subclasses override this.
* @param {string} file Full path to the file
* @param {function} callback (err, ArrayBuffer)
*/
DictionaryLoader.prototype.loadArrayBuffer = function (file, callback) {
throw new Error("DictionaryLoader#loadArrayBuffer should be overwritten");
};
/**
* Promisified wrapper around loadArrayBuffer.
*/
DictionaryLoader.prototype._loadFile = function (filename) {
let self = this;
return new Promise(function (resolve, reject) {
self.loadArrayBuffer(path.join(self.dic_path, filename), function (err, buffer) {
if (err) reject(err);
else resolve(buffer);
});
});
};
/**
* Load all dictionary files in parallel using native Promise.all.
* @param {DictionaryLoader~onLoad} load_callback Callback function called after loaded
*/
DictionaryLoader.prototype.load = function (load_callback) {
let dic = this.dic;
let self = this;
Promise.all([
// Trie (base + check)
Promise.all([
self._loadFile("base.dat"),
self._loadFile("check.dat"),
]).then(function (buffers) {
dic.loadTrie(new Int32Array(buffers[0]), new Int32Array(buffers[1]));
}),
// Token info dictionaries
Promise.all([
self._loadFile("tid.dat"),
self._loadFile("tid_pos.dat"),
self._loadFile("tid_map.dat"),
]).then(function (buffers) {
dic.loadTokenInfoDictionaries(
new Uint8Array(buffers[0]),
new Uint8Array(buffers[1]),
new Uint8Array(buffers[2])
);
}),
// Connection cost matrix
self._loadFile("cc.dat").then(function (buffer) {
dic.loadConnectionCosts(new Int16Array(buffer));
}),
// Unknown dictionaries
Promise.all([
self._loadFile("unk.dat"),
self._loadFile("unk_pos.dat"),
self._loadFile("unk_map.dat"),
self._loadFile("unk_char.dat"),
self._loadFile("unk_compat.dat"),
self._loadFile("unk_invoke.dat"),
]).then(function (buffers) {
dic.loadUnknownDictionaries(
new Uint8Array(buffers[0]),
new Uint8Array(buffers[1]),
new Uint8Array(buffers[2]),
new Uint8Array(buffers[3]),
new Uint32Array(buffers[4]),
new Uint8Array(buffers[5])
);
}),
])
.then(function () {
load_callback(null, dic);
})
.catch(function (err) {
load_callback(err, null);
});
};
/**
* Callback
* @callback DictionaryLoader~onLoad
* @param {Object} err Error object
* @param {DynamicDictionaries} dic Loaded dictionary
*/
module.exports = DictionaryLoader;