kuromoji
Version:
JavaScript implementation of Japanese morphological analyzer
103 lines (90 loc) • 3.18 kB
JavaScript
/*
* Copyright 2014 Takuya Asano
* Copyright 2010-2014 Atilika Inc. and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
;
/**
* ViterbiSearcher is for searching best Viterbi path
* @param {ConnectionCosts} connection_costs Connection costs matrix
* @constructor
*/
function ViterbiSearcher(connection_costs) {
this.connection_costs = connection_costs;
}
/**
* Search best path by forward-backward algorithm
* @param {ViterbiLattice} lattice Viterbi lattice to search
* @returns {Array} Shortest path
*/
ViterbiSearcher.prototype.search = function (lattice) {
lattice = this.forward(lattice);
return this.backward(lattice);
};
ViterbiSearcher.prototype.forward = function (lattice) {
var i, j, k;
for (i = 1; i <= lattice.eos_pos; i++) {
var nodes = lattice.nodes_end_at[i];
if (nodes == null) {
continue;
}
for (j = 0; j < nodes.length; j++) {
var node = nodes[j];
var cost = Number.MAX_VALUE;
var shortest_prev_node;
var prev_nodes = lattice.nodes_end_at[node.start_pos - 1];
if (prev_nodes == null) {
// TODO process unknown words (repair word lattice)
continue;
}
for (k = 0; k < prev_nodes.length; k++) {
var prev_node = prev_nodes[k];
var edge_cost;
if (node.left_id == null || prev_node.right_id == null) {
// TODO assert
console.log("Left or right is null");
edge_cost = 0;
} else {
edge_cost = this.connection_costs.get(prev_node.right_id, node.left_id);
}
var _cost = prev_node.shortest_cost + edge_cost + node.cost;
if (_cost < cost) {
shortest_prev_node = prev_node;
cost = _cost;
}
}
node.prev = shortest_prev_node;
node.shortest_cost = cost;
}
}
return lattice;
};
ViterbiSearcher.prototype.backward = function (lattice) {
var shortest_path = [];
var eos = lattice.nodes_end_at[lattice.nodes_end_at.length - 1][0];
var node_back = eos.prev;
if (node_back == null) {
return [];
}
while (node_back.type !== "BOS") {
shortest_path.push(node_back);
if (node_back.prev == null) {
// TODO Failed to back. Process unknown words?
return [];
}
node_back = node_back.prev;
}
return shortest_path.reverse();
};
module.exports = ViterbiSearcher;