UNPKG

kusamoji

Version:

Japanese morphological analyzer for Node.js — Viterbi tokenizer with mmap dict loading and pluggable POS-source strategy

108 lines (96 loc) 3.18 kB
"use strict"; let path = require("path"); let DynamicDictionaries = require("../dict/DynamicDictionaries"); /** * DictionaryLoader base constructor * @param {string} dic_path Dictionary path * @constructor */ function DictionaryLoader(dic_path) { this.dic = new DynamicDictionaries(); this.dic_path = dic_path; } /** * Load a single .dat file from disk — subclasses override this. * @param {string} file Full path to the file * @param {function} callback (err, ArrayBuffer) */ DictionaryLoader.prototype.loadArrayBuffer = function (file, callback) { throw new Error("DictionaryLoader#loadArrayBuffer should be overwritten"); }; /** * Promisified wrapper around loadArrayBuffer. */ DictionaryLoader.prototype._loadFile = function (filename) { let self = this; return new Promise(function (resolve, reject) { self.loadArrayBuffer(path.join(self.dic_path, filename), function (err, buffer) { if (err) reject(err); else resolve(buffer); }); }); }; /** * Load all dictionary files in parallel using native Promise.all. * @param {DictionaryLoader~onLoad} load_callback Callback function called after loaded */ DictionaryLoader.prototype.load = function (load_callback) { let dic = this.dic; let self = this; Promise.all([ // Trie (base + check) Promise.all([ self._loadFile("base.dat"), self._loadFile("check.dat"), ]).then(function (buffers) { dic.loadTrie(new Int32Array(buffers[0]), new Int32Array(buffers[1])); }), // Token info dictionaries Promise.all([ self._loadFile("tid.dat"), self._loadFile("tid_pos.dat"), self._loadFile("tid_map.dat"), ]).then(function (buffers) { dic.loadTokenInfoDictionaries( new Uint8Array(buffers[0]), new Uint8Array(buffers[1]), new Uint8Array(buffers[2]) ); }), // Connection cost matrix self._loadFile("cc.dat").then(function (buffer) { dic.loadConnectionCosts(new Int16Array(buffer)); }), // Unknown dictionaries Promise.all([ self._loadFile("unk.dat"), self._loadFile("unk_pos.dat"), self._loadFile("unk_map.dat"), self._loadFile("unk_char.dat"), self._loadFile("unk_compat.dat"), self._loadFile("unk_invoke.dat"), ]).then(function (buffers) { dic.loadUnknownDictionaries( new Uint8Array(buffers[0]), new Uint8Array(buffers[1]), new Uint8Array(buffers[2]), new Uint8Array(buffers[3]), new Uint32Array(buffers[4]), new Uint8Array(buffers[5]) ); }), ]) .then(function () { load_callback(null, dic); }) .catch(function (err) { load_callback(err, null); }); }; /** * Callback * @callback DictionaryLoader~onLoad * @param {Object} err Error object * @param {DynamicDictionaries} dic Loaded dictionary */ module.exports = DictionaryLoader;