UNPKG

@a-s8h/liblevenshtein

Version:

Various utilities regarding Levenshtein transducers.

799 lines (763 loc) 26 kB
// Generated by CoffeeScript 1.7.1 (function() { var Builder, Dawg, MaxHeap, Transducer, def_properties, def_property, fields, global, identity, noop, property, value, __bind = function(fn, me){ return function(){ return fn.apply(me, arguments); }; }, __hasProp = {}.hasOwnProperty, __slice = [].slice; global = typeof exports === 'object' ? exports : typeof window === 'object' ? window : this; global['levenshtein'] || (global['levenshtein'] = {}); if (typeof require === 'function') { MaxHeap = require('../collection/max-heap').levenshtein.MaxHeap; Transducer = require('./transducer').levenshtein.Transducer; Dawg = require('../collection/dawg').levenshtein.Dawg; } else { MaxHeap = global['levenshtein']['MaxHeap']; Transducer = global['levenshtein']['Transducer']; Dawg = global['levenshtein']['Dawg']; } fields = { '_dictionary': new Dawg([]), '_algorithm': 'standard', '_sort_candidates': true, '_case_insensitive_sort': true, '_include_distance': true, '_maximum_candidates': Infinity, '_custom_comparator': null, '_custom_transform': null, '_default_edit_distance': Infinity }; Builder = (function() { function Builder(source, attributes) { this._unsubsume = __bind(this._unsubsume, this); var attribute, field, value; if (source instanceof Builder) { for (field in fields) { if (!__hasProp.call(fields, field)) continue; this[field] = source[field]; } for (attribute in attributes) { if (!__hasProp.call(attributes, attribute)) continue; value = attributes[attribute]; this['_' + attribute] = value; } } } Builder.prototype._minimum_distance = function() { if (this['_algorithm'] === 'standard') { return function(state, w) { var distance, e, i, minimum, _i, _len, _ref; minimum = Infinity; for (_i = 0, _len = state.length; _i < _len; _i++) { _ref = state[_i], i = _ref[0], e = _ref[1]; distance = w - i + e; if (distance < minimum) { minimum = distance; } } return minimum; }; } else { return function(state, w) { var distance, e, i, minimum, x, _i, _len, _ref; minimum = Infinity; for (_i = 0, _len = state.length; _i < _len; _i++) { _ref = state[_i], i = _ref[0], e = _ref[1], x = _ref[2]; distance = w - i + e; if (x !== 1 && distance < minimum) { minimum = distance; } } return minimum; }; } }; Builder.prototype._comparator = function() { var comparator; if (typeof this['_custom_comparator'] === 'function') { return this['_custom_comparator']; } else if (this['_sort_candidates']) { comparator = function(a, b) { return a[1] - b[1]; }; comparator = (function(comparator) { return function(a, b) { return comparator(a, b) || a[0].toLowerCase().localeCompare(b[0].toLowerCase()); }; })(comparator); if (!this['_case_insensitive_sort']) { comparator = (function(comparator) { return function(a, b) { return comparator(a, b) || a[0].localeCompare(b[0]); }; })(comparator); } return comparator; } else { return function() { return 0; }; } }; Builder.prototype._transform = function(comparator) { var transform; transform = typeof this['_custom_transform'] === 'function' ? this['_custom_transform'] : this['_include_distance'] === false ? function(candidate) { return candidate[0]; } : void 0; return (function(_this) { return function(matches) { var heap, i; if (isFinite(_this['_maximum_candidates'])) { matches['sort'](); matches = matches['heap']; } else if (_this['_sort_candidates']) { heap = matches; matches = []; while (heap['peek']() !== null) { matches.push(heap['pop']()); } } if (typeof transform === 'function') { i = -1; while ((++i) < matches.length) { matches[i] = transform(matches[i]); } } return matches; }; })(this); }; Builder.prototype._initial_state = function() { if (this['_algorithm'] === 'standard') { return [[0, 0]]; } else { return [[0, 0, 0]]; } }; Builder.prototype._sort_for_transition = function() { var comparator, _ref; comparator = function(a, b) { return a[0] - b[0] || a[1] - b[1]; }; if ((_ref = this['_algorithm']) === 'transposition' || _ref === 'merge_and_split') { comparator = (function(comparator) { return function(a, b) { return comparator(a, b) || a[2] - b[2]; }; })(comparator); } return function(state) { return state.sort(comparator); }; }; Builder.prototype._index_of = function(vector, k, i) { var j; j = 0; while (j < k) { if (vector[i + j]) { return j; } j += 1; } return -1; }; Builder.prototype._transition_for_position = function() { switch (this['_algorithm']) { case 'standard': return (function(_this) { return function(n) { return function(_arg, vector, offset) { var a, b, e, h, i, j, k, w; i = _arg[0], e = _arg[1]; h = i - offset; w = vector.length; if (e < n) { if (h <= w - 2) { a = n - e + 1; b = w - h; k = a < b ? a : b; j = _this._index_of(vector, k, h); if (j === 0) { return [[i + 1, e]]; } else if (j > 0) { return [[i, e + 1], [i + 1, e + 1], [i + j + 1, e + j]]; } else { return [[i, e + 1], [i + 1, e + 1]]; } } else if (h === w - 1) { if (vector[h]) { return [[i + 1, e]]; } else { return [[i, e + 1], [i + 1, e + 1]]; } } else { return [[i, e + 1]]; } } else if (e === n) { if (h <= w - 1) { if (vector[h]) { return [[i + 1, n]]; } else { return null; } } else { return null; } } else { return null; } }; }; })(this); case 'transposition': return (function(_this) { return function(n) { return function(_arg, vector, offset) { var a, b, e, h, i, j, k, t, w; i = _arg[0], e = _arg[1], t = _arg[2]; h = i - offset; w = vector.length; if ((e === 0 && 0 < n)) { if (h <= w - 2) { a = n - e + 1; b = w - h; k = a < b ? a : b; j = _this._index_of(vector, k, h); if (j === 0) { return [[i + 1, 0, 0]]; } else if (j === 1) { return [[i, 1, 0], [i, 1, 1], [i + 1, 1, 0], [i + 2, 1, 0]]; } else if (j > 1) { return [[i, 1, 0], [i + 1, 1, 0], [i + j + 1, j, 0]]; } else { return [[i, 1, 0], [i + 1, 1, 0]]; } } else if (h === w - 1) { if (vector[h]) { return [[i + 1, 0, 0]]; } else { return [[i, 1, 0], [i + 1, 1, 0]]; } } else { return [[i, 1, 0]]; } } else if ((1 <= e && e < n)) { if (h <= w - 2) { if (t === 0) { a = n - e + 1; b = w - h; k = a < b ? a : b; j = _this._index_of(vector, k, h); if (j === 0) { return [[i + 1, e, 0]]; } else if (j === 1) { return [[i, e + 1, 0], [i, e + 1, 1], [i + 1, e + 1, 0], [i + 2, e + 1, 0]]; } else if (j > 1) { return [[i, e + 1, 0], [i + 1, e + 1, 0], [i + j + 1, e + j, 0]]; } else { return [[i, e + 1, 0], [i + 1, e + 1, 0]]; } } else { if (vector[h]) { return [[i + 2, e, 0]]; } else { return null; } } } else if (h === w - 1) { if (vector[h]) { return [[i + 1, e, 0]]; } else { return [[i, e + 1, 0], [i + 1, e + 1, 0]]; } } else { return [[i, e + 1, 0]]; } } else { if (h <= w - 1 && t === 0) { if (vector[h]) { return [[i + 1, n, 0]]; } else { return null; } } else if (h <= w - 2 && t === 1) { if (vector[h]) { return [[i + 2, n, 0]]; } else { return null; } } else { return null; } } }; }; })(this); case 'merge_and_split': return (function(_this) { return function(n) { return function(_arg, vector, offset) { var e, h, i, s, w; i = _arg[0], e = _arg[1], s = _arg[2]; h = i - offset; w = vector.length; if ((e === 0 && 0 < n)) { if (h <= w - 2) { if (vector[h]) { return [[i + 1, e, 0]]; } else { return [[i, e + 1, 0], [i, e + 1, 1], [i + 1, e + 1, 0], [i + 2, e + 1, 0]]; } } else if (h === w - 1) { if (vector[h]) { return [[i + 1, e, 0]]; } else { return [[i, e + 1, 0], [i, e + 1, 1], [i + 1, e + 1, 0]]; } } else { return [[i, e + 1, 0]]; } } else if (e < n) { if (h <= w - 2) { if (s === 0) { if (vector[h]) { return [[i + 1, e, 0]]; } else { return [[i, e + 1, 0], [i, e + 1, 1], [i + 1, e + 1, 0], [i + 2, e + 1, 0]]; } } else { return [[i + 1, e, 0]]; } } else if (h === w - 1) { if (s === 0) { if (vector[h]) { return [[i + 1, e, 0]]; } else { return [[i, e + 1, 0], [i, e + 1, 1], [i + 1, e + 1, 0]]; } } else { return [[i + 1, e, 0]]; } } else { return [[i, e + 1, 0]]; } } else { if (h <= w - 1) { if (s === 0) { if (vector[h]) { return [[i + 1, n, 0]]; } else { return null; } } else { return [[i + 1, e, 0]]; } } else { return null; } } }; }; })(this); } }; Builder.prototype._bisect_error_right = function(state, e, l) { var i, u; u = state.length; while (l < u) { i = (l + u) >> 1; if (e < state[i][1]) { u = i; } else { l = i + 1; } } return l; }; Builder.prototype._unsubsume = function() { var bisect_error_right, subsumes; subsumes = this._subsumes(); bisect_error_right = this._bisect_error_right; switch (this['_algorithm']) { case 'standard': return function(state) { var e, f, i, j, m, n, x, y; m = 0; while (x = state[m]) { i = x[0], e = x[1]; n = bisect_error_right(state, e, m); while (y = state[n]) { j = y[0], f = y[1]; if (subsumes(i, e, j, f)) { state.splice(n, 1); } else { n += 1; } } m += 1; } }; case 'transposition': return function(state) { var e, f, i, j, m, n, s, t, x, y; m = 0; while (x = state[m]) { i = x[0], e = x[1], s = x[2]; n = bisect_error_right(state, e, m); while (y = state[n]) { j = y[0], f = y[1], t = y[2]; if (subsumes(i, e, s, j, f, t, n)) { state.splice(n, 1); } else { n += 1; } } m += 1; } }; case 'merge_and_split': return function(state) { var e, f, i, j, m, n, s, t, x, y; m = 0; while (x = state[m]) { i = x[0], e = x[1], s = x[2]; n = bisect_error_right(state, e, m); while (y = state[n]) { j = y[0], f = y[1], t = y[2]; if (subsumes(i, e, s, j, f, t, n)) { state.splice(n, 1); } else { n += 1; } } m += 1; } }; } }; Builder.prototype._subsumes = function() { switch (this['_algorithm']) { case 'standard': return function(i, e, j, f) { return ((i < j) && (j - i) || (i - j)) <= (f - e); }; case 'transposition': return function(i, e, s, j, f, t, n) { if (s === 1) { if (t === 1) { return i === j; } else { return (f === n) && (i === j); } } else { if (t === 1) { return (j < i ? i - j - 1 : j - i + 1) <= (f - e); } else { return ((i < j) && (j - i) || (i - j)) <= (f - e); } } }; case 'merge_and_split': return function(i, e, s, j, f, t) { if (s === 1 && t === 0) { return false; } else { return ((i < j) && (j - i) || (i - j)) <= (f - e); } }; } }; Builder.prototype._bisect_left = function() { if (this['_algorithm']) { return function(state, position) { var e, i, k, l, p, u; i = position[0], e = position[1]; l = 0; u = state.length; while (l < u) { k = (l + u) >> 1; p = state[k]; if ((e - p[1] || i - p[0]) > 0) { l = k + 1; } else { u = k; } } return l; }; } else { return function(state, position) { var e, i, k, l, p, u, x; i = position[0], e = position[1], x = position[2]; l = 0; u = state.length; while (l < u) { k = (l + u) >> 1; p = state[k]; if ((e - p[1] || i - p[0] || x - p[2]) > 0) { l = k + 1; } else { u = k; } } return l; }; } }; Builder.prototype._merge_for_subsumption = function() { var bisect_left; bisect_left = this._bisect_left(); if (this['_algorithm'] === 'standard') { return function(state_prime, next_state) { var curr, i, position, _i, _len; for (_i = 0, _len = next_state.length; _i < _len; _i++) { position = next_state[_i]; i = bisect_left(state_prime, position); if (curr = state_prime[i]) { if (curr[0] !== position[0] || curr[1] !== position[1]) { state_prime.splice(i, 0, position); } } else { state_prime.push(position); } } }; } else { return function(state_prime, next_state) { var curr, i, position, _i, _len; for (_i = 0, _len = next_state.length; _i < _len; _i++) { position = next_state[_i]; i = bisect_left(state_prime, position); if (curr = state_prime[i]) { if (curr[0] !== position[0] || curr[1] !== position[1] || curr[2] !== position[2]) { state_prime.splice(i, 0, position); } } else { state_prime.push(position); } } }; } }; Builder.prototype._transition_for_state = function() { var merge_for_subsumption, sort_for_transition, transition_for_position, unsubsume; merge_for_subsumption = this._merge_for_subsumption(); unsubsume = this._unsubsume(); transition_for_position = this._transition_for_position(); sort_for_transition = this._sort_for_transition(); return function(n) { var transition; transition = transition_for_position(n); return (function(_this) { return function(state, vector) { var next_state, offset, position, state_prime, _i, _len; offset = state[0][0]; state_prime = []; for (_i = 0, _len = state.length; _i < _len; _i++) { position = state[_i]; next_state = transition(position, vector, offset); if (!next_state) { continue; } merge_for_subsumption(state_prime, next_state); } unsubsume(state_prime); if (state_prime.length > 0) { sort_for_transition(state_prime); return state_prime; } else { return null; } }; })(this); }; }; Builder.prototype._characteristic_vector = function() { return function(x, term, k, i) { var j, vector; vector = []; j = 0; while (j < k) { vector.push(x === term[i + j]); j += 1; } return vector; }; }; Builder.prototype._push = function(compare) { var maximum_candidates; maximum_candidates = this['_maximum_candidates']; if (isFinite(maximum_candidates)) { return function(candidates, candidate) { if (candidates.length === maximum_candidates) { if (compare(candidate, candidates['peek']()) < 0) { candidates['pop'](); candidates.push(candidate); } } else { candidates.push(candidate); } return candidates; }; } else { return function(candidates, candidate) { candidates.push(candidate); return candidates; }; } }; Builder.prototype['build'] = function() { var comparator; comparator = this._comparator(); return new Transducer({ 'minimum_distance': this._minimum_distance(), 'build_matches': (function(_this) { return function() { if (isFinite(_this['_maximum_candidates'])) { return function() { return new MaxHeap(comparator); }; } else if (_this['_sort_candidates']) { return function() { return new MaxHeap(function(a, b) { return -comparator(a, b); }); }; } else { return function() { return []; }; } }; })(this)(), 'transition_for_state': this._transition_for_state(), 'characteristic_vector': this._characteristic_vector(), 'edges': function(dawg_node) { return dawg_node['edges']; }, 'is_final': function(dawg_node) { return dawg_node['is_final']; }, 'root': (function(dawg) { return function() { return dawg['root']; }; })(this['_dictionary']), 'initial_state': (function(initial_state) { return (function(_this) { return function() { return initial_state; }; })(this); })(this._initial_state()), 'push': this._push(comparator), 'default_edit_distance': (function(_this) { return function() { return _this['default_edit_distance'](); }; })(this), 'transform': this._transform(comparator) }); }; return Builder; })(); Builder.prototype['transducer'] = Builder.prototype['build']; for (property in fields) { if (!__hasProp.call(fields, property)) continue; value = fields[property]; Builder.prototype[property] = value; } noop = function() {}; identity = function(x) { return x; }; def_property = def_properties = function(properties, params, property, i) { var translate, validate, _fn, _i, _len, _ref; _ref = [params['validate'], params['translate']], validate = _ref[0], translate = _ref[1]; if (typeof properties === 'string') { properties = [properties]; } if (!(properties instanceof Array)) { throw new Error('Expected "properties" to be of type Array'); } if (validate !== undefined && typeof validate !== 'function') { throw new Error('Expected "validate" to be of type Function'); } if (translate !== undefined && typeof translate !== 'function') { throw new Error('Expected "translate" to be of type Function'); } validate || (validate = noop); translate || (translate = identity); _fn = function(property) { var field; field = '_' + property; return Builder.prototype[property] = function() { var attributes, opts, value; value = arguments[0], opts = 2 <= arguments.length ? __slice.call(arguments, 1) : []; if (value === undefined) { return this[field]; } else { validate(value, opts, property); value = translate(value, opts, property); attributes = {}; attributes[property] = value; return new Builder(this, attributes); } }; }; for (i = _i = 0, _len = properties.length; _i < _len; i = ++_i) { property = properties[i]; if (typeof property !== 'string') { throw new Error("Expected property at index " + i + " of properties to be of type String"); } _fn(property); } return true; }; def_property('dictionary', { 'validate': function(dictionary) { if (!(dictionary instanceof Array || dictionary instanceof Dawg)) { throw new Error('dictionary must be either an Array or Dawg'); } }, 'translate': function(dictionary, _arg) { var sorted; sorted = _arg[0]; if (dictionary instanceof Array) { if (sorted !== true) { dictionary.sort(); } dictionary = new Dawg(dictionary); } return dictionary; } }); def_property('algorithm', { 'validate': function(algorithm) { if (algorithm !== 'standard' && algorithm !== 'transposition' && algorithm !== 'merge_and_split') { throw new Error('algorithm must be standard, transposition, or merge_and_split'); } } }); def_properties(['sort_candidates', 'case_insensitive_sort', 'include_distance'], { 'validate': function(value, _, property) { if (typeof value !== 'boolean') { throw new Error("Expected type of \"" + property + "\" to be boolean"); } } }); def_properties(['maximum_candidates', 'default_edit_distance'], { 'validate': function(value, _, property) { if (!(typeof value === 'number' && 0 <= value)) { throw new Error("Expected \"" + property + "\" to be a non-negative number"); } } }); def_properties(['custom_comparator', 'custom_transform'], { 'validate': function(value, _, property) { if (typeof value !== 'function') { throw new Error("Expected \"" + property + "\" to be a function"); } } }); global['levenshtein']['Builder'] = Builder; }).call(this);