@a-s8h/liblevenshtein
Version:
Various utilities regarding Levenshtein transducers.
799 lines (763 loc) • 26 kB
JavaScript
// Generated by CoffeeScript 1.7.1
(function() {
var Builder, Dawg, MaxHeap, Transducer, def_properties, def_property, fields, global, identity, noop, property, value,
__bind = function(fn, me){ return function(){ return fn.apply(me, arguments); }; },
__hasProp = {}.hasOwnProperty,
__slice = [].slice;
global = typeof exports === 'object' ? exports : typeof window === 'object' ? window : this;
global['levenshtein'] || (global['levenshtein'] = {});
if (typeof require === 'function') {
MaxHeap = require('../collection/max-heap').levenshtein.MaxHeap;
Transducer = require('./transducer').levenshtein.Transducer;
Dawg = require('../collection/dawg').levenshtein.Dawg;
} else {
MaxHeap = global['levenshtein']['MaxHeap'];
Transducer = global['levenshtein']['Transducer'];
Dawg = global['levenshtein']['Dawg'];
}
fields = {
'_dictionary': new Dawg([]),
'_algorithm': 'standard',
'_sort_candidates': true,
'_case_insensitive_sort': true,
'_include_distance': true,
'_maximum_candidates': Infinity,
'_custom_comparator': null,
'_custom_transform': null,
'_default_edit_distance': Infinity
};
Builder = (function() {
function Builder(source, attributes) {
this._unsubsume = __bind(this._unsubsume, this);
var attribute, field, value;
if (source instanceof Builder) {
for (field in fields) {
if (!__hasProp.call(fields, field)) continue;
this[field] = source[field];
}
for (attribute in attributes) {
if (!__hasProp.call(attributes, attribute)) continue;
value = attributes[attribute];
this['_' + attribute] = value;
}
}
}
Builder.prototype._minimum_distance = function() {
if (this['_algorithm'] === 'standard') {
return function(state, w) {
var distance, e, i, minimum, _i, _len, _ref;
minimum = Infinity;
for (_i = 0, _len = state.length; _i < _len; _i++) {
_ref = state[_i], i = _ref[0], e = _ref[1];
distance = w - i + e;
if (distance < minimum) {
minimum = distance;
}
}
return minimum;
};
} else {
return function(state, w) {
var distance, e, i, minimum, x, _i, _len, _ref;
minimum = Infinity;
for (_i = 0, _len = state.length; _i < _len; _i++) {
_ref = state[_i], i = _ref[0], e = _ref[1], x = _ref[2];
distance = w - i + e;
if (x !== 1 && distance < minimum) {
minimum = distance;
}
}
return minimum;
};
}
};
Builder.prototype._comparator = function() {
var comparator;
if (typeof this['_custom_comparator'] === 'function') {
return this['_custom_comparator'];
} else if (this['_sort_candidates']) {
comparator = function(a, b) {
return a[1] - b[1];
};
comparator = (function(comparator) {
return function(a, b) {
return comparator(a, b) || a[0].toLowerCase().localeCompare(b[0].toLowerCase());
};
})(comparator);
if (!this['_case_insensitive_sort']) {
comparator = (function(comparator) {
return function(a, b) {
return comparator(a, b) || a[0].localeCompare(b[0]);
};
})(comparator);
}
return comparator;
} else {
return function() {
return 0;
};
}
};
Builder.prototype._transform = function(comparator) {
var transform;
transform = typeof this['_custom_transform'] === 'function' ? this['_custom_transform'] : this['_include_distance'] === false ? function(candidate) {
return candidate[0];
} : void 0;
return (function(_this) {
return function(matches) {
var heap, i;
if (isFinite(_this['_maximum_candidates'])) {
matches['sort']();
matches = matches['heap'];
} else if (_this['_sort_candidates']) {
heap = matches;
matches = [];
while (heap['peek']() !== null) {
matches.push(heap['pop']());
}
}
if (typeof transform === 'function') {
i = -1;
while ((++i) < matches.length) {
matches[i] = transform(matches[i]);
}
}
return matches;
};
})(this);
};
Builder.prototype._initial_state = function() {
if (this['_algorithm'] === 'standard') {
return [[0, 0]];
} else {
return [[0, 0, 0]];
}
};
Builder.prototype._sort_for_transition = function() {
var comparator, _ref;
comparator = function(a, b) {
return a[0] - b[0] || a[1] - b[1];
};
if ((_ref = this['_algorithm']) === 'transposition' || _ref === 'merge_and_split') {
comparator = (function(comparator) {
return function(a, b) {
return comparator(a, b) || a[2] - b[2];
};
})(comparator);
}
return function(state) {
return state.sort(comparator);
};
};
Builder.prototype._index_of = function(vector, k, i) {
var j;
j = 0;
while (j < k) {
if (vector[i + j]) {
return j;
}
j += 1;
}
return -1;
};
Builder.prototype._transition_for_position = function() {
switch (this['_algorithm']) {
case 'standard':
return (function(_this) {
return function(n) {
return function(_arg, vector, offset) {
var a, b, e, h, i, j, k, w;
i = _arg[0], e = _arg[1];
h = i - offset;
w = vector.length;
if (e < n) {
if (h <= w - 2) {
a = n - e + 1;
b = w - h;
k = a < b ? a : b;
j = _this._index_of(vector, k, h);
if (j === 0) {
return [[i + 1, e]];
} else if (j > 0) {
return [[i, e + 1], [i + 1, e + 1], [i + j + 1, e + j]];
} else {
return [[i, e + 1], [i + 1, e + 1]];
}
} else if (h === w - 1) {
if (vector[h]) {
return [[i + 1, e]];
} else {
return [[i, e + 1], [i + 1, e + 1]];
}
} else {
return [[i, e + 1]];
}
} else if (e === n) {
if (h <= w - 1) {
if (vector[h]) {
return [[i + 1, n]];
} else {
return null;
}
} else {
return null;
}
} else {
return null;
}
};
};
})(this);
case 'transposition':
return (function(_this) {
return function(n) {
return function(_arg, vector, offset) {
var a, b, e, h, i, j, k, t, w;
i = _arg[0], e = _arg[1], t = _arg[2];
h = i - offset;
w = vector.length;
if ((e === 0 && 0 < n)) {
if (h <= w - 2) {
a = n - e + 1;
b = w - h;
k = a < b ? a : b;
j = _this._index_of(vector, k, h);
if (j === 0) {
return [[i + 1, 0, 0]];
} else if (j === 1) {
return [[i, 1, 0], [i, 1, 1], [i + 1, 1, 0], [i + 2, 1, 0]];
} else if (j > 1) {
return [[i, 1, 0], [i + 1, 1, 0], [i + j + 1, j, 0]];
} else {
return [[i, 1, 0], [i + 1, 1, 0]];
}
} else if (h === w - 1) {
if (vector[h]) {
return [[i + 1, 0, 0]];
} else {
return [[i, 1, 0], [i + 1, 1, 0]];
}
} else {
return [[i, 1, 0]];
}
} else if ((1 <= e && e < n)) {
if (h <= w - 2) {
if (t === 0) {
a = n - e + 1;
b = w - h;
k = a < b ? a : b;
j = _this._index_of(vector, k, h);
if (j === 0) {
return [[i + 1, e, 0]];
} else if (j === 1) {
return [[i, e + 1, 0], [i, e + 1, 1], [i + 1, e + 1, 0], [i + 2, e + 1, 0]];
} else if (j > 1) {
return [[i, e + 1, 0], [i + 1, e + 1, 0], [i + j + 1, e + j, 0]];
} else {
return [[i, e + 1, 0], [i + 1, e + 1, 0]];
}
} else {
if (vector[h]) {
return [[i + 2, e, 0]];
} else {
return null;
}
}
} else if (h === w - 1) {
if (vector[h]) {
return [[i + 1, e, 0]];
} else {
return [[i, e + 1, 0], [i + 1, e + 1, 0]];
}
} else {
return [[i, e + 1, 0]];
}
} else {
if (h <= w - 1 && t === 0) {
if (vector[h]) {
return [[i + 1, n, 0]];
} else {
return null;
}
} else if (h <= w - 2 && t === 1) {
if (vector[h]) {
return [[i + 2, n, 0]];
} else {
return null;
}
} else {
return null;
}
}
};
};
})(this);
case 'merge_and_split':
return (function(_this) {
return function(n) {
return function(_arg, vector, offset) {
var e, h, i, s, w;
i = _arg[0], e = _arg[1], s = _arg[2];
h = i - offset;
w = vector.length;
if ((e === 0 && 0 < n)) {
if (h <= w - 2) {
if (vector[h]) {
return [[i + 1, e, 0]];
} else {
return [[i, e + 1, 0], [i, e + 1, 1], [i + 1, e + 1, 0], [i + 2, e + 1, 0]];
}
} else if (h === w - 1) {
if (vector[h]) {
return [[i + 1, e, 0]];
} else {
return [[i, e + 1, 0], [i, e + 1, 1], [i + 1, e + 1, 0]];
}
} else {
return [[i, e + 1, 0]];
}
} else if (e < n) {
if (h <= w - 2) {
if (s === 0) {
if (vector[h]) {
return [[i + 1, e, 0]];
} else {
return [[i, e + 1, 0], [i, e + 1, 1], [i + 1, e + 1, 0], [i + 2, e + 1, 0]];
}
} else {
return [[i + 1, e, 0]];
}
} else if (h === w - 1) {
if (s === 0) {
if (vector[h]) {
return [[i + 1, e, 0]];
} else {
return [[i, e + 1, 0], [i, e + 1, 1], [i + 1, e + 1, 0]];
}
} else {
return [[i + 1, e, 0]];
}
} else {
return [[i, e + 1, 0]];
}
} else {
if (h <= w - 1) {
if (s === 0) {
if (vector[h]) {
return [[i + 1, n, 0]];
} else {
return null;
}
} else {
return [[i + 1, e, 0]];
}
} else {
return null;
}
}
};
};
})(this);
}
};
Builder.prototype._bisect_error_right = function(state, e, l) {
var i, u;
u = state.length;
while (l < u) {
i = (l + u) >> 1;
if (e < state[i][1]) {
u = i;
} else {
l = i + 1;
}
}
return l;
};
Builder.prototype._unsubsume = function() {
var bisect_error_right, subsumes;
subsumes = this._subsumes();
bisect_error_right = this._bisect_error_right;
switch (this['_algorithm']) {
case 'standard':
return function(state) {
var e, f, i, j, m, n, x, y;
m = 0;
while (x = state[m]) {
i = x[0], e = x[1];
n = bisect_error_right(state, e, m);
while (y = state[n]) {
j = y[0], f = y[1];
if (subsumes(i, e, j, f)) {
state.splice(n, 1);
} else {
n += 1;
}
}
m += 1;
}
};
case 'transposition':
return function(state) {
var e, f, i, j, m, n, s, t, x, y;
m = 0;
while (x = state[m]) {
i = x[0], e = x[1], s = x[2];
n = bisect_error_right(state, e, m);
while (y = state[n]) {
j = y[0], f = y[1], t = y[2];
if (subsumes(i, e, s, j, f, t, n)) {
state.splice(n, 1);
} else {
n += 1;
}
}
m += 1;
}
};
case 'merge_and_split':
return function(state) {
var e, f, i, j, m, n, s, t, x, y;
m = 0;
while (x = state[m]) {
i = x[0], e = x[1], s = x[2];
n = bisect_error_right(state, e, m);
while (y = state[n]) {
j = y[0], f = y[1], t = y[2];
if (subsumes(i, e, s, j, f, t, n)) {
state.splice(n, 1);
} else {
n += 1;
}
}
m += 1;
}
};
}
};
Builder.prototype._subsumes = function() {
switch (this['_algorithm']) {
case 'standard':
return function(i, e, j, f) {
return ((i < j) && (j - i) || (i - j)) <= (f - e);
};
case 'transposition':
return function(i, e, s, j, f, t, n) {
if (s === 1) {
if (t === 1) {
return i === j;
} else {
return (f === n) && (i === j);
}
} else {
if (t === 1) {
return (j < i ? i - j - 1 : j - i + 1) <= (f - e);
} else {
return ((i < j) && (j - i) || (i - j)) <= (f - e);
}
}
};
case 'merge_and_split':
return function(i, e, s, j, f, t) {
if (s === 1 && t === 0) {
return false;
} else {
return ((i < j) && (j - i) || (i - j)) <= (f - e);
}
};
}
};
Builder.prototype._bisect_left = function() {
if (this['_algorithm']) {
return function(state, position) {
var e, i, k, l, p, u;
i = position[0], e = position[1];
l = 0;
u = state.length;
while (l < u) {
k = (l + u) >> 1;
p = state[k];
if ((e - p[1] || i - p[0]) > 0) {
l = k + 1;
} else {
u = k;
}
}
return l;
};
} else {
return function(state, position) {
var e, i, k, l, p, u, x;
i = position[0], e = position[1], x = position[2];
l = 0;
u = state.length;
while (l < u) {
k = (l + u) >> 1;
p = state[k];
if ((e - p[1] || i - p[0] || x - p[2]) > 0) {
l = k + 1;
} else {
u = k;
}
}
return l;
};
}
};
Builder.prototype._merge_for_subsumption = function() {
var bisect_left;
bisect_left = this._bisect_left();
if (this['_algorithm'] === 'standard') {
return function(state_prime, next_state) {
var curr, i, position, _i, _len;
for (_i = 0, _len = next_state.length; _i < _len; _i++) {
position = next_state[_i];
i = bisect_left(state_prime, position);
if (curr = state_prime[i]) {
if (curr[0] !== position[0] || curr[1] !== position[1]) {
state_prime.splice(i, 0, position);
}
} else {
state_prime.push(position);
}
}
};
} else {
return function(state_prime, next_state) {
var curr, i, position, _i, _len;
for (_i = 0, _len = next_state.length; _i < _len; _i++) {
position = next_state[_i];
i = bisect_left(state_prime, position);
if (curr = state_prime[i]) {
if (curr[0] !== position[0] || curr[1] !== position[1] || curr[2] !== position[2]) {
state_prime.splice(i, 0, position);
}
} else {
state_prime.push(position);
}
}
};
}
};
Builder.prototype._transition_for_state = function() {
var merge_for_subsumption, sort_for_transition, transition_for_position, unsubsume;
merge_for_subsumption = this._merge_for_subsumption();
unsubsume = this._unsubsume();
transition_for_position = this._transition_for_position();
sort_for_transition = this._sort_for_transition();
return function(n) {
var transition;
transition = transition_for_position(n);
return (function(_this) {
return function(state, vector) {
var next_state, offset, position, state_prime, _i, _len;
offset = state[0][0];
state_prime = [];
for (_i = 0, _len = state.length; _i < _len; _i++) {
position = state[_i];
next_state = transition(position, vector, offset);
if (!next_state) {
continue;
}
merge_for_subsumption(state_prime, next_state);
}
unsubsume(state_prime);
if (state_prime.length > 0) {
sort_for_transition(state_prime);
return state_prime;
} else {
return null;
}
};
})(this);
};
};
Builder.prototype._characteristic_vector = function() {
return function(x, term, k, i) {
var j, vector;
vector = [];
j = 0;
while (j < k) {
vector.push(x === term[i + j]);
j += 1;
}
return vector;
};
};
Builder.prototype._push = function(compare) {
var maximum_candidates;
maximum_candidates = this['_maximum_candidates'];
if (isFinite(maximum_candidates)) {
return function(candidates, candidate) {
if (candidates.length === maximum_candidates) {
if (compare(candidate, candidates['peek']()) < 0) {
candidates['pop']();
candidates.push(candidate);
}
} else {
candidates.push(candidate);
}
return candidates;
};
} else {
return function(candidates, candidate) {
candidates.push(candidate);
return candidates;
};
}
};
Builder.prototype['build'] = function() {
var comparator;
comparator = this._comparator();
return new Transducer({
'minimum_distance': this._minimum_distance(),
'build_matches': (function(_this) {
return function() {
if (isFinite(_this['_maximum_candidates'])) {
return function() {
return new MaxHeap(comparator);
};
} else if (_this['_sort_candidates']) {
return function() {
return new MaxHeap(function(a, b) {
return -comparator(a, b);
});
};
} else {
return function() {
return [];
};
}
};
})(this)(),
'transition_for_state': this._transition_for_state(),
'characteristic_vector': this._characteristic_vector(),
'edges': function(dawg_node) {
return dawg_node['edges'];
},
'is_final': function(dawg_node) {
return dawg_node['is_final'];
},
'root': (function(dawg) {
return function() {
return dawg['root'];
};
})(this['_dictionary']),
'initial_state': (function(initial_state) {
return (function(_this) {
return function() {
return initial_state;
};
})(this);
})(this._initial_state()),
'push': this._push(comparator),
'default_edit_distance': (function(_this) {
return function() {
return _this['default_edit_distance']();
};
})(this),
'transform': this._transform(comparator)
});
};
return Builder;
})();
Builder.prototype['transducer'] = Builder.prototype['build'];
for (property in fields) {
if (!__hasProp.call(fields, property)) continue;
value = fields[property];
Builder.prototype[property] = value;
}
noop = function() {};
identity = function(x) {
return x;
};
def_property = def_properties = function(properties, params, property, i) {
var translate, validate, _fn, _i, _len, _ref;
_ref = [params['validate'], params['translate']], validate = _ref[0], translate = _ref[1];
if (typeof properties === 'string') {
properties = [properties];
}
if (!(properties instanceof Array)) {
throw new Error('Expected "properties" to be of type Array');
}
if (validate !== undefined && typeof validate !== 'function') {
throw new Error('Expected "validate" to be of type Function');
}
if (translate !== undefined && typeof translate !== 'function') {
throw new Error('Expected "translate" to be of type Function');
}
validate || (validate = noop);
translate || (translate = identity);
_fn = function(property) {
var field;
field = '_' + property;
return Builder.prototype[property] = function() {
var attributes, opts, value;
value = arguments[0], opts = 2 <= arguments.length ? __slice.call(arguments, 1) : [];
if (value === undefined) {
return this[field];
} else {
validate(value, opts, property);
value = translate(value, opts, property);
attributes = {};
attributes[property] = value;
return new Builder(this, attributes);
}
};
};
for (i = _i = 0, _len = properties.length; _i < _len; i = ++_i) {
property = properties[i];
if (typeof property !== 'string') {
throw new Error("Expected property at index " + i + " of properties to be of type String");
}
_fn(property);
}
return true;
};
def_property('dictionary', {
'validate': function(dictionary) {
if (!(dictionary instanceof Array || dictionary instanceof Dawg)) {
throw new Error('dictionary must be either an Array or Dawg');
}
},
'translate': function(dictionary, _arg) {
var sorted;
sorted = _arg[0];
if (dictionary instanceof Array) {
if (sorted !== true) {
dictionary.sort();
}
dictionary = new Dawg(dictionary);
}
return dictionary;
}
});
def_property('algorithm', {
'validate': function(algorithm) {
if (algorithm !== 'standard' && algorithm !== 'transposition' && algorithm !== 'merge_and_split') {
throw new Error('algorithm must be standard, transposition, or merge_and_split');
}
}
});
def_properties(['sort_candidates', 'case_insensitive_sort', 'include_distance'], {
'validate': function(value, _, property) {
if (typeof value !== 'boolean') {
throw new Error("Expected type of \"" + property + "\" to be boolean");
}
}
});
def_properties(['maximum_candidates', 'default_edit_distance'], {
'validate': function(value, _, property) {
if (!(typeof value === 'number' && 0 <= value)) {
throw new Error("Expected \"" + property + "\" to be a non-negative number");
}
}
});
def_properties(['custom_comparator', 'custom_transform'], {
'validate': function(value, _, property) {
if (typeof value !== 'function') {
throw new Error("Expected \"" + property + "\" to be a function");
}
}
});
global['levenshtein']['Builder'] = Builder;
}).call(this);