kusamoji
Version:
Japanese morphological analyzer for Node.js — Viterbi tokenizer with mmap dict loading and pluggable POS-source strategy
52 lines (45 loc) • 2.05 kB
JavaScript
;
let CharacterDefinition = require("../CharacterDefinition");
let InvokeDefinitionMap = require("../InvokeDefinitionMap");
let CATEGORY_DEF_PATTERN = /^(\w+)\s+(\d)\s+(\d)\s+(\d)/;
let CATEGORY_MAPPING_PATTERN = /^(0x[0-9A-F]{4})(?:\s+([^#\s]+))(?:\s+([^#\s]+))*/;
let RANGE_CATEGORY_MAPPING_PATTERN = /^(0x[0-9A-F]{4})\.\.(0x[0-9A-F]{4})(?:\s+([^#\s]+))(?:\s+([^#\s]+))*/;
/**
* CharacterDefinitionBuilder
* @constructor
*/
function CharacterDefinitionBuilder() {
this.char_def = new CharacterDefinition();
this.char_def.invoke_definition_map = new InvokeDefinitionMap();
this.character_category_definition = [];
this.category_mapping = [];
}
CharacterDefinitionBuilder.prototype.putLine = function (line) {
let parsed_category_def = CATEGORY_DEF_PATTERN.exec(line);
if (parsed_category_def != null) {
let class_id = this.character_category_definition.length;
let char_class = CharacterDefinition.parseCharCategory(class_id, parsed_category_def);
if (char_class == null) {
return;
}
this.character_category_definition.push(char_class);
return;
}
let parsed_category_mapping = CATEGORY_MAPPING_PATTERN.exec(line);
if (parsed_category_mapping != null) {
let mapping = CharacterDefinition.parseCategoryMapping(parsed_category_mapping);
this.category_mapping.push(mapping);
}
let parsed_range_category_mapping = RANGE_CATEGORY_MAPPING_PATTERN.exec(line);
if (parsed_range_category_mapping != null) {
let range_mapping = CharacterDefinition.parseRangeCategoryMapping(parsed_range_category_mapping);
this.category_mapping.push(range_mapping);
}
};
CharacterDefinitionBuilder.prototype.build = function () {
// TODO If DEFAULT category does not exist, throw error
this.char_def.invoke_definition_map.init(this.character_category_definition);
this.char_def.initCategoryMappings(this.category_mapping);
return this.char_def;
};
module.exports = CharacterDefinitionBuilder;