kuroshiro-analyzer-mecab
Version:
mecab morphological analyzer for kuroshiro
104 lines (99 loc) • 3.78 kB
JavaScript
// Check where we are
import Mecab from "mecab-async";
/**
* Mecab Analyzer
*/
class Analyzer {
/**
* Constructor
* @param {string} [command] mecab command. If set, the param `dictPath` is ignored.
* @param {string} [dictPath] Path of the dictionary mecab used.
* @param {Object} [execOptions] The exec options to run mecab command.
* @param {Number} [execOptions.maxBuffer] Largest amount of data in bytes allowed on stdout or stderr. see https://nodejs.org/api/child_process.html#child_process_child_process_exec_command_options_callback.
* @param {Number} [execOptions.timeout] Timeout. see https://nodejs.org/api/child_process.html#child_process_child_process_exec_command_options_callback.
*/
constructor({ command, dictPath, execOptions } = {}) {
this._analyzer = null;
this._execOptions = execOptions || {};
if (command) {
this._command = command;
}
else if (dictPath) {
this._command = `mecab -d ${dictPath}`;
}
else {
this._command = "mecab";
}
}
/**
* Initialize the analyzer
* @returns {Promise} Promise object represents the result of initialization.
*/
init() {
return new Promise((resolve, reject) => {
if (this._analyzer == null) {
this._analyzer = new Mecab();
this._analyzer.command = this._command;
this._analyzer.options = this._execOptions;
this._analyzer.parser = data => ({
surface_form: data[0],
pos: data[1],
pos_detail_1: data[2],
pos_detail_2: data[3],
pos_detail_3: data[4],
conjugated_type: data[5],
conjugated_form: data[6],
basic_form: data[7],
reading: data[8],
pronunciation: data[9]
});
resolve();
}
else {
reject(new Error("This analyzer has already been initialized."));
}
});
}
/**
* Parse the given string
* @param {*} str input string.
* @returns {Promise} Promise object represents the result of parsing.
*/
parse(str = "") {
const parseToken = (token) => {
if (token === "") return Promise.resolve([]);
return new Promise((resolve, reject) => {
this._analyzer.parseFormat(token, (err, result) => {
if (err) return reject(err);
resolve(result);
});
});
};
return new Promise((resolve, reject) => {
const tokens = str.split(" ");
Promise.all(tokens.map(parseToken)).then((results) => {
let result = [];
for (let i = 0; i < results.length; i++) {
if (i === results.length - 1) {
result = result.concat(results[i]);
}
else {
result = result.concat(results[i]);
result.push({
surface_form: " ",
pos: "記号",
pos_detail_1: "空白",
pos_detail_2: "*",
pos_detail_3: "*",
conjugated_type: "*",
conjugated_form: "*",
basic_form: "*"
});
}
}
resolve(result);
}).catch(err => reject(err));
});
}
}
export default Analyzer;