UNPKG

kuromoji

Version:

JavaScript implementation of Japanese morphological analyzer

138 lines (126 loc) 5.08 kB
/* * Copyright 2014 Takuya Asano * Copyright 2010-2014 Atilika Inc. and contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ "use strict"; var path = require("path"); var async = require("async"); var DynamicDictionaries = require("../dict/DynamicDictionaries"); /** * DictionaryLoader base constructor * @param {string} dic_path Dictionary path * @constructor */ function DictionaryLoader(dic_path) { this.dic = new DynamicDictionaries(); this.dic_path = dic_path; } DictionaryLoader.prototype.loadArrayBuffer = function (file, callback) { throw new Error("DictionaryLoader#loadArrayBuffer should be overwrite"); }; /** * Load dictionary files * @param {DictionaryLoader~onLoad} load_callback Callback function called after loaded */ DictionaryLoader.prototype.load = function (load_callback) { var dic = this.dic; var dic_path = this.dic_path; var loadArrayBuffer = this.loadArrayBuffer; async.parallel([ // Trie function (callback) { async.map([ "base.dat.gz", "check.dat.gz" ], function (filename, _callback) { loadArrayBuffer(path.join(dic_path, filename), function (err, buffer) { if(err) { return _callback(err); } _callback(null, buffer); }); }, function (err, buffers) { if(err) { return callback(err); } var base_buffer = new Int32Array(buffers[0]); var check_buffer = new Int32Array(buffers[1]); dic.loadTrie(base_buffer, check_buffer); callback(null); }); }, // Token info dictionaries function (callback) { async.map([ "tid.dat.gz", "tid_pos.dat.gz", "tid_map.dat.gz" ], function (filename, _callback) { loadArrayBuffer(path.join(dic_path, filename), function (err, buffer) { if(err) { return _callback(err); } _callback(null, buffer); }); }, function (err, buffers) { if(err) { return callback(err); } var token_info_buffer = new Uint8Array(buffers[0]); var pos_buffer = new Uint8Array(buffers[1]); var target_map_buffer = new Uint8Array(buffers[2]); dic.loadTokenInfoDictionaries(token_info_buffer, pos_buffer, target_map_buffer); callback(null); }); }, // Connection cost matrix function (callback) { loadArrayBuffer(path.join(dic_path, "cc.dat.gz"), function (err, buffer) { if(err) { return callback(err); } var cc_buffer = new Int16Array(buffer); dic.loadConnectionCosts(cc_buffer); callback(null); }); }, // Unknown dictionaries function (callback) { async.map([ "unk.dat.gz", "unk_pos.dat.gz", "unk_map.dat.gz", "unk_char.dat.gz", "unk_compat.dat.gz", "unk_invoke.dat.gz" ], function (filename, _callback) { loadArrayBuffer(path.join(dic_path, filename), function (err, buffer) { if(err) { return _callback(err); } _callback(null, buffer); }); }, function (err, buffers) { if(err) { return callback(err); } var unk_buffer = new Uint8Array(buffers[0]); var unk_pos_buffer = new Uint8Array(buffers[1]); var unk_map_buffer = new Uint8Array(buffers[2]); var cat_map_buffer = new Uint8Array(buffers[3]); var compat_cat_map_buffer = new Uint32Array(buffers[4]); var invoke_def_buffer = new Uint8Array(buffers[5]); dic.loadUnknownDictionaries(unk_buffer, unk_pos_buffer, unk_map_buffer, cat_map_buffer, compat_cat_map_buffer, invoke_def_buffer); // dic.loadUnknownDictionaries(char_buffer, unk_buffer); callback(null); }); } ], function (err) { load_callback(err, dic); }); }; /** * Callback * @callback DictionaryLoader~onLoad * @param {Object} err Error object * @param {DynamicDictionaries} dic Loaded dictionary */ module.exports = DictionaryLoader;