UNPKG

kuromoji

Version:

JavaScript implementation of Japanese morphological analyzer

103 lines (90 loc) 3.18 kB
/* * Copyright 2014 Takuya Asano * Copyright 2010-2014 Atilika Inc. and contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ "use strict"; /** * ViterbiSearcher is for searching best Viterbi path * @param {ConnectionCosts} connection_costs Connection costs matrix * @constructor */ function ViterbiSearcher(connection_costs) { this.connection_costs = connection_costs; } /** * Search best path by forward-backward algorithm * @param {ViterbiLattice} lattice Viterbi lattice to search * @returns {Array} Shortest path */ ViterbiSearcher.prototype.search = function (lattice) { lattice = this.forward(lattice); return this.backward(lattice); }; ViterbiSearcher.prototype.forward = function (lattice) { var i, j, k; for (i = 1; i <= lattice.eos_pos; i++) { var nodes = lattice.nodes_end_at[i]; if (nodes == null) { continue; } for (j = 0; j < nodes.length; j++) { var node = nodes[j]; var cost = Number.MAX_VALUE; var shortest_prev_node; var prev_nodes = lattice.nodes_end_at[node.start_pos - 1]; if (prev_nodes == null) { // TODO process unknown words (repair word lattice) continue; } for (k = 0; k < prev_nodes.length; k++) { var prev_node = prev_nodes[k]; var edge_cost; if (node.left_id == null || prev_node.right_id == null) { // TODO assert console.log("Left or right is null"); edge_cost = 0; } else { edge_cost = this.connection_costs.get(prev_node.right_id, node.left_id); } var _cost = prev_node.shortest_cost + edge_cost + node.cost; if (_cost < cost) { shortest_prev_node = prev_node; cost = _cost; } } node.prev = shortest_prev_node; node.shortest_cost = cost; } } return lattice; }; ViterbiSearcher.prototype.backward = function (lattice) { var shortest_path = []; var eos = lattice.nodes_end_at[lattice.nodes_end_at.length - 1][0]; var node_back = eos.prev; if (node_back == null) { return []; } while (node_back.type !== "BOS") { shortest_path.push(node_back); if (node_back.prev == null) { // TODO Failed to back. Process unknown words? return []; } node_back = node_back.prev; } return shortest_path.reverse(); }; module.exports = ViterbiSearcher;