cmpstr
Version:
CmpStr is a lightweight, fast and well performing package for calculating string similarity
192 lines (188 loc) • 5.62 kB
JavaScript
// CmpStr v3.2.2 build-bb61120-260311 by Paul Köhler @komed3 / MIT License
'use strict';
var Errors = require('../utils/Errors.cjs');
var HashTable = require('../utils/HashTable.cjs');
var Profiler = require('../utils/Profiler.cjs');
var Registry = require('../utils/Registry.cjs');
const profiler = Profiler.Profiler.getInstance();
class Metric {
static cache = new HashTable.HashTable();
metric;
a;
b;
origA = [];
origB = [];
options;
optKey;
symmetric;
results;
static clear = () => this.cache.clear();
static swap = (a, b, m, n) => (m > n ? [b, a, n, m] : [a, b, m, n]);
static clamp = (res) => Math.max(0, Math.min(1, res));
constructor(metric, a, b, opt = {}, symmetric = false) {
this.metric = metric;
this.a = Array.isArray(a) ? a : [a];
this.b = Array.isArray(b) ? b : [b];
Errors.ErrorUtil.assert(
this.a.length > 0 && this.b.length > 0,
`Inputs <a> and <b> must not be empty`,
{ a: this.a, b: this.b }
);
this.options = opt;
this.optKey = HashTable.Hasher.fastFNV1a(
JSON.stringify(opt, Object.keys(opt).sort())
).toString();
this.symmetric = symmetric;
}
preCompute(a, b, m, n) {
if (a === b) return { res: 1 };
if (m == 0 || n == 0 || (m < 2 && n < 2)) return { res: 0 };
return undefined;
}
compute(a, b, m, n, maxLen) {
throw new Errors.CmpStrInternalError(
`Method compute() must be overridden in a subclass`
);
}
runSingle(i, j) {
return Errors.ErrorUtil.wrap(
() => {
let a = String(this.a[i]),
A = a;
let b = String(this.b[j]),
B = b;
let m = A.length,
n = B.length;
let result = this.preCompute(A, B, m, n);
if (!result) {
result = profiler.run(() => {
if (this.symmetric) [A, B, m, n] = Metric.swap(A, B, m, n);
const key =
Metric.cache.key(this.metric, [A, B], this.symmetric) +
this.optKey;
return (
Metric.cache.get(key || '') ??
(() => {
const res = this.compute(A, B, m, n, Math.max(m, n));
if (key) Metric.cache.set(key, res);
return res;
})()
);
});
}
return {
metric: this.metric,
a: this.origA[i] ?? a,
b: this.origB[j] ?? b,
...result
};
},
`Failed to compute metric for inputs at indices a[${i}] and b[${j}]`,
{ i, j }
);
}
async runSingleAsync(i, j) {
return Promise.resolve(this.runSingle(i, j));
}
runBatch() {
const results = [];
for (let i = 0; i < this.a.length; i++)
for (let j = 0; j < this.b.length; j++)
results.push(this.runSingle(i, j));
this.results = results;
}
async runBatchAsync() {
const results = [];
for (let i = 0; i < this.a.length; i++)
for (let j = 0; j < this.b.length; j++)
results.push(await this.runSingleAsync(i, j));
this.results = results;
}
runPairwise() {
const results = [];
for (let i = 0; i < this.a.length; i++) results.push(this.runSingle(i, i));
this.results = results;
}
async runPairwiseAsync() {
const results = [];
for (let i = 0; i < this.a.length; i++)
results.push(await this.runSingleAsync(i, i));
this.results = results;
}
setOriginal(a, b) {
if (a) this.origA = Array.isArray(a) ? a : [a];
if (b) this.origB = Array.isArray(b) ? b : [b];
return this;
}
isBatch = () => this.a.length > 1 || this.b.length > 1;
isSingle = () => !this.isBatch();
isPairwise(safe = false) {
return this.isBatch() && this.a.length === this.b.length
? true
: !safe &&
(() => {
throw new Errors.CmpStrUsageError(
`Mode <pairwise> requires arrays of equal length`,
{ a: this.a, b: this.b }
);
})();
}
isSymmetrical = () => this.symmetric;
whichMode = (mode) => mode ?? this.options?.mode ?? 'default';
clear = () => (this.results = undefined);
run(mode, clear = true) {
if (clear) this.clear();
switch (this.whichMode(mode)) {
case 'default':
if (this.isSingle()) {
this.results = this.runSingle(0, 0);
break;
}
case 'batch':
this.runBatch();
break;
case 'single':
this.results = this.runSingle(0, 0);
break;
case 'pairwise':
if (this.isPairwise()) this.runPairwise();
break;
default:
throw new Errors.CmpStrInternalError(`Unsupported mode <${mode}>`);
}
}
async runAsync(mode, clear = true) {
if (clear) this.clear();
switch (this.whichMode(mode)) {
case 'default':
if (this.isSingle()) {
this.results = await this.runSingleAsync(0, 0);
break;
}
case 'batch':
await this.runBatchAsync();
break;
case 'single':
this.results = await this.runSingleAsync(0, 0);
break;
case 'pairwise':
if (this.isPairwise()) await this.runPairwiseAsync();
break;
default:
throw new Errors.CmpStrInternalError(
`Unsupported async mode <${mode}>`
);
}
}
getMetricName = () => this.metric;
getResults() {
Errors.ErrorUtil.assert(
this.results !== undefined,
`run() must be called before getResults()`
);
return this.results;
}
}
const MetricRegistry = Registry.Registry('metric', Metric);
exports.Metric = Metric;
exports.MetricRegistry = MetricRegistry;