UNPKG

string-metric

Version:

Get string similarity in JavaScript or TypeScript

101 lines (100 loc) 3.59 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.NGram = void 0; var utils_1 = require("./utils/utils"); var NGram = /** @class */ (function () { function NGram(n) { this.DEFAULT_N = 2; this.n = utils_1.isNullOrUndefined(n) ? this.DEFAULT_N : n; } NGram.prototype.distance = function (s0, s1) { if (utils_1.isNullOrUndefined(s0)) { throw new Error('s0 must neither be null nor undefined'); } if (utils_1.isNullOrUndefined(s1)) { throw new Error('s1 must neither be null nor undefined'); } if (s0 === s1) { return 0; } var special = '\n'; var sl = s0.length; var tl = s1.length; if (sl == 0 || tl == 0) { return 1; } var cost = 0; if (sl < this.n || tl < this.n) { for (var i_1 = 0, ni = Math.min(sl, tl); i_1 < ni; i_1++) { if (s0.charAt(i_1) == s1.charAt(i_1)) { cost++; } } return cost / Math.max(sl, tl); } var sa = Array(sl + this.n - 1); var p; //'previous' cost array, horizontally var d; // cost array, horizontally var d2; //placeholder to assist in swapping p and d //construct sa with prefix for (var i_2 = 0; i_2 < sa.length; i_2++) { if (i_2 < this.n - 1) { sa[i_2] = special; //add prefix } else { sa[i_2] = s0.charAt(i_2 - this.n + 1); } } p = Array(sl + 1); d = Array(sl + 1); // indexes into strings s and t var i; // iterates through source var j; // iterates through target var t_j = Array(this.n); // jth n-gram of t for (i = 0; i <= sl; i++) { p[i] = i; } for (j = 1; j <= tl; j++) { //construct t_j n-gram if (j < this.n) { for (var ti = 0; ti < this.n - j; ti++) { t_j[ti] = special; //add prefix } for (var ti = this.n - j; ti < this.n; ti++) { t_j[ti] = s1.charAt(ti - (this.n - j)); } } else { t_j = s1.substring(j - this.n, j).split(''); } d[0] = j; for (i = 1; i <= sl; i++) { cost = 0; var tn = this.n; //compare sa to t_j for (var ni = 0; ni < this.n; ni++) { if (sa[i - 1 + ni] != t_j[ni]) { cost++; } else if (sa[i - 1 + ni] == special) { //discount matches on prefix tn--; } } var ec = cost / tn; // minimum of cell to the left+1, to the top+1, // diagonally left and up +cost d[i] = Math.min(Math.min(d[i - 1] + 1, p[i] + 1), p[i - 1] + ec); } // copy current distance counts to 'previous row' distance counts d2 = p; p = d; d = d2; } // our last action in the above loop was to switch d and p, so p now // actually has the most recent cost counts return p[sl] / Math.max(tl, sl); }; return NGram; }()); exports.NGram = NGram;