string-metric
Version:
Get string similarity in JavaScript or TypeScript
101 lines (100 loc) • 3.59 kB
JavaScript
Object.defineProperty(exports, "__esModule", { value: true });
exports.NGram = void 0;
var utils_1 = require("./utils/utils");
var NGram = /** @class */ (function () {
function NGram(n) {
this.DEFAULT_N = 2;
this.n = utils_1.isNullOrUndefined(n) ? this.DEFAULT_N : n;
}
NGram.prototype.distance = function (s0, s1) {
if (utils_1.isNullOrUndefined(s0)) {
throw new Error('s0 must neither be null nor undefined');
}
if (utils_1.isNullOrUndefined(s1)) {
throw new Error('s1 must neither be null nor undefined');
}
if (s0 === s1) {
return 0;
}
var special = '\n';
var sl = s0.length;
var tl = s1.length;
if (sl == 0 || tl == 0) {
return 1;
}
var cost = 0;
if (sl < this.n || tl < this.n) {
for (var i_1 = 0, ni = Math.min(sl, tl); i_1 < ni; i_1++) {
if (s0.charAt(i_1) == s1.charAt(i_1)) {
cost++;
}
}
return cost / Math.max(sl, tl);
}
var sa = Array(sl + this.n - 1);
var p; //'previous' cost array, horizontally
var d; // cost array, horizontally
var d2; //placeholder to assist in swapping p and d
//construct sa with prefix
for (var i_2 = 0; i_2 < sa.length; i_2++) {
if (i_2 < this.n - 1) {
sa[i_2] = special; //add prefix
}
else {
sa[i_2] = s0.charAt(i_2 - this.n + 1);
}
}
p = Array(sl + 1);
d = Array(sl + 1);
// indexes into strings s and t
var i; // iterates through source
var j; // iterates through target
var t_j = Array(this.n); // jth n-gram of t
for (i = 0; i <= sl; i++) {
p[i] = i;
}
for (j = 1; j <= tl; j++) {
//construct t_j n-gram
if (j < this.n) {
for (var ti = 0; ti < this.n - j; ti++) {
t_j[ti] = special; //add prefix
}
for (var ti = this.n - j; ti < this.n; ti++) {
t_j[ti] = s1.charAt(ti - (this.n - j));
}
}
else {
t_j = s1.substring(j - this.n, j).split('');
}
d[0] = j;
for (i = 1; i <= sl; i++) {
cost = 0;
var tn = this.n;
//compare sa to t_j
for (var ni = 0; ni < this.n; ni++) {
if (sa[i - 1 + ni] != t_j[ni]) {
cost++;
}
else if (sa[i - 1 + ni] == special) {
//discount matches on prefix
tn--;
}
}
var ec = cost / tn;
// minimum of cell to the left+1, to the top+1,
// diagonally left and up +cost
d[i] = Math.min(Math.min(d[i - 1] + 1, p[i] + 1), p[i - 1] + ec);
}
// copy current distance counts to 'previous row' distance counts
d2 = p;
p = d;
d = d2;
}
// our last action in the above loop was to switch d and p, so p now
// actually has the most recent cost counts
return p[sl] / Math.max(tl, sl);
};
return NGram;
}());
exports.NGram = NGram;
;