fuzzball
Version:
Fuzzy string matching algorithms and utilities, port of the TheFuzz Python library.
78 lines (66 loc) • 2.44 kB
JavaScript
// @ts-check
// levenshtein distance with astral support
/** from https://github.com/hiddentao/fast-levenshtein slightly modified to double weight replacements as done by python-Levenshtein/fuzzywuzzy */
var collator;
try {
collator = (typeof Intl !== "undefined" && typeof Intl.Collator !== "undefined") ? Intl.Collator("generic", { sensitivity: "base" }) : null;
} catch (err) {
if (typeof console !== undefined) console.warn("Collator could not be initialized and wouldn't be used");
}
module.exports = function leven(a, b, options) {
/** from https://github.com/sindresorhus/leven slightly modified to double weight replacements as done by python-Levenshtein/fuzzywuzzy */
var arr = [];
var charCodeCache = [];
var useCollator = (options && collator && options.useCollator);
var subcost = 1;
//to match behavior of python-Levenshtein and fuzzywuzzy, set to 2 in _ratio
if (options && options.subcost && typeof options.subcost === "number") subcost = options.subcost;
if (a === b) {
return 0;
}
var achars = Array.from(a);
var bchars = Array.from(b);
var aLen = achars.length;
var bLen = bchars.length;
if (aLen === 0) {
return bLen;
}
if (bLen === 0) {
return aLen;
}
var bCharCode;
var ret;
var tmp;
var tmp2;
var i = 0;
var j = 0;
while (i < aLen) {
charCodeCache[i] = achars[i].codePointAt(0);
arr[i] = ++i;
}
if (!useCollator) { //checking for collator inside while 2x slower
while (j < bLen) {
bCharCode = bchars[j].codePointAt(0);
tmp = j++;
ret = j;
for (i = 0; i < aLen; i++) {
tmp2 = bCharCode === charCodeCache[i] ? tmp : tmp + subcost;
tmp = arr[i];
ret = arr[i] = tmp > ret ? tmp2 > ret ? ret + 1 : tmp2 : tmp2 > tmp ? tmp + 1 : tmp2;
}
}
}
else {
while (j < bLen) {
bCharCode = bchars[j].codePointAt(0);
tmp = j++;
ret = j;
for (i = 0; i < aLen; i++) {
tmp2 = 0 === collator.compare(String.fromCodePoint(bCharCode), String.fromCodePoint(charCodeCache[i])) ? tmp : tmp + subcost;
tmp = arr[i];
ret = arr[i] = tmp > ret ? tmp2 > ret ? ret + 1 : tmp2 : tmp2 > tmp ? tmp + 1 : tmp2;
}
}
}
return ret;
}