kusamoji
Version:
Japanese morphological analyzer for Node.js — Viterbi tokenizer with mmap dict loading and pluggable POS-source strategy
71 lines (62 loc) • 2.09 kB
JavaScript
;
let DictionaryLoader = require("./DictionaryLoader");
// Load the mmap native addon via the resolution chain:
// ~/.kusamoji/ cache → shipped prebuilts → local build → null (fallback)
let mmapFile = null;
try {
let { loadMmapAddon } = require("../native/loader");
let addon = loadMmapAddon();
if (addon) mmapFile = addon.mmapFile;
} catch (e) {
// Loader or addon not available — fall back to fs.readFile
}
/**
* NodeDictionaryLoader inherits DictionaryLoader.
*
* Uses mmap when the native addon is available (zero-copy, OS-managed
* page cache, ~0ms load time). Falls back to fs.readFile if the addon
* is not built.
*
* @param {string} dic_path Dictionary path
* @constructor
*/
function NodeDictionaryLoader(dic_path) {
DictionaryLoader.apply(this, [ dic_path ]);
}
NodeDictionaryLoader.prototype = Object.create(DictionaryLoader.prototype);
/**
* Load an uncompressed dictionary .dat file.
* @param {string} file Dictionary file path (ends in .dat)
* @param {NodeDictionaryLoader~onLoad} callback Callback function
*/
NodeDictionaryLoader.prototype.loadArrayBuffer = function (file, callback) {
if (mmapFile) {
// mmap path: zero-copy, OS manages page faults and eviction
try {
let arrayBuffer = mmapFile(file);
if (!arrayBuffer || !(arrayBuffer instanceof ArrayBuffer)) {
callback(new Error("mmapFile returned invalid buffer for " + file));
return;
}
callback(null, arrayBuffer);
} catch (err) {
callback(err);
}
return;
}
// Fallback: read entire file into V8 heap
let fs = require("fs");
fs.readFile(file, function (err, buffer) {
if (err) {
return callback(err);
}
let typed_array = new Uint8Array(buffer);
callback(null, typed_array.buffer);
});
};
/**
* @callback NodeDictionaryLoader~onLoad
* @param {Object} err Error object
* @param {Uint8Array} buffer Loaded buffer
*/
module.exports = NodeDictionaryLoader;