UNPKG

kusamoji

Version:

Japanese morphological analyzer for Node.js — Viterbi tokenizer with mmap dict loading and pluggable POS-source strategy

52 lines (45 loc) 2.05 kB
"use strict"; let CharacterDefinition = require("../CharacterDefinition"); let InvokeDefinitionMap = require("../InvokeDefinitionMap"); let CATEGORY_DEF_PATTERN = /^(\w+)\s+(\d)\s+(\d)\s+(\d)/; let CATEGORY_MAPPING_PATTERN = /^(0x[0-9A-F]{4})(?:\s+([^#\s]+))(?:\s+([^#\s]+))*/; let RANGE_CATEGORY_MAPPING_PATTERN = /^(0x[0-9A-F]{4})\.\.(0x[0-9A-F]{4})(?:\s+([^#\s]+))(?:\s+([^#\s]+))*/; /** * CharacterDefinitionBuilder * @constructor */ function CharacterDefinitionBuilder() { this.char_def = new CharacterDefinition(); this.char_def.invoke_definition_map = new InvokeDefinitionMap(); this.character_category_definition = []; this.category_mapping = []; } CharacterDefinitionBuilder.prototype.putLine = function (line) { let parsed_category_def = CATEGORY_DEF_PATTERN.exec(line); if (parsed_category_def != null) { let class_id = this.character_category_definition.length; let char_class = CharacterDefinition.parseCharCategory(class_id, parsed_category_def); if (char_class == null) { return; } this.character_category_definition.push(char_class); return; } let parsed_category_mapping = CATEGORY_MAPPING_PATTERN.exec(line); if (parsed_category_mapping != null) { let mapping = CharacterDefinition.parseCategoryMapping(parsed_category_mapping); this.category_mapping.push(mapping); } let parsed_range_category_mapping = RANGE_CATEGORY_MAPPING_PATTERN.exec(line); if (parsed_range_category_mapping != null) { let range_mapping = CharacterDefinition.parseRangeCategoryMapping(parsed_range_category_mapping); this.category_mapping.push(range_mapping); } }; CharacterDefinitionBuilder.prototype.build = function () { // TODO If DEFAULT category does not exist, throw error this.char_def.invoke_definition_map.init(this.character_category_definition); this.char_def.initCategoryMappings(this.category_mapping); return this.char_def; }; module.exports = CharacterDefinitionBuilder;