kusamoji
Version:
Japanese morphological analyzer for Node.js — Viterbi tokenizer with mmap dict loading and pluggable POS-source strategy
42 lines (33 loc) • 1.49 kB
JavaScript
;
let TokenInfoDictionary = require("./TokenInfoDictionary");
let CharacterDefinition = require("./CharacterDefinition");
let ByteBuffer = require("../util/ByteBuffer");
/**
* UnknownDictionary
* @constructor
*/
function UnknownDictionary() {
this.dictionary = new ByteBuffer(10 * 1024 * 1024);
this.target_map = {}; // class_id (of CharacterClass) -> token_info_id (of unknown class)
this.pos_buffer = new ByteBuffer(10 * 1024 * 1024);
this.character_definition = null;
}
// Inherit from TokenInfoDictionary as a super class
UnknownDictionary.prototype = Object.create(TokenInfoDictionary.prototype);
UnknownDictionary.prototype.characterDefinition = function (character_definition) {
this.character_definition = character_definition;
return this;
};
UnknownDictionary.prototype.lookup = function (ch) {
return this.character_definition.lookup(ch);
};
UnknownDictionary.prototype.lookupCompatibleCategory = function (ch) {
return this.character_definition.lookupCompatibleCategory(ch);
};
UnknownDictionary.prototype.loadUnknownDictionaries = function (unk_buffer, unk_pos_buffer, unk_map_buffer, cat_map_buffer, compat_cat_map_buffer, invoke_def_buffer) {
this.loadDictionary(unk_buffer);
this.loadPosVector(unk_pos_buffer);
this.loadTargetMap(unk_map_buffer);
this.character_definition = CharacterDefinition.load(cat_map_buffer, compat_cat_map_buffer, invoke_def_buffer);
};
module.exports = UnknownDictionary;