UNPKG

cmpstr

Version:

CmpStr is a lightweight, fast and well performing package for calculating string similarity

192 lines (188 loc) 5.62 kB
// CmpStr v3.2.2 build-bb61120-260311 by Paul Köhler @komed3 / MIT License 'use strict'; var Errors = require('../utils/Errors.cjs'); var HashTable = require('../utils/HashTable.cjs'); var Profiler = require('../utils/Profiler.cjs'); var Registry = require('../utils/Registry.cjs'); const profiler = Profiler.Profiler.getInstance(); class Metric { static cache = new HashTable.HashTable(); metric; a; b; origA = []; origB = []; options; optKey; symmetric; results; static clear = () => this.cache.clear(); static swap = (a, b, m, n) => (m > n ? [b, a, n, m] : [a, b, m, n]); static clamp = (res) => Math.max(0, Math.min(1, res)); constructor(metric, a, b, opt = {}, symmetric = false) { this.metric = metric; this.a = Array.isArray(a) ? a : [a]; this.b = Array.isArray(b) ? b : [b]; Errors.ErrorUtil.assert( this.a.length > 0 && this.b.length > 0, `Inputs <a> and <b> must not be empty`, { a: this.a, b: this.b } ); this.options = opt; this.optKey = HashTable.Hasher.fastFNV1a( JSON.stringify(opt, Object.keys(opt).sort()) ).toString(); this.symmetric = symmetric; } preCompute(a, b, m, n) { if (a === b) return { res: 1 }; if (m == 0 || n == 0 || (m < 2 && n < 2)) return { res: 0 }; return undefined; } compute(a, b, m, n, maxLen) { throw new Errors.CmpStrInternalError( `Method compute() must be overridden in a subclass` ); } runSingle(i, j) { return Errors.ErrorUtil.wrap( () => { let a = String(this.a[i]), A = a; let b = String(this.b[j]), B = b; let m = A.length, n = B.length; let result = this.preCompute(A, B, m, n); if (!result) { result = profiler.run(() => { if (this.symmetric) [A, B, m, n] = Metric.swap(A, B, m, n); const key = Metric.cache.key(this.metric, [A, B], this.symmetric) + this.optKey; return ( Metric.cache.get(key || '') ?? (() => { const res = this.compute(A, B, m, n, Math.max(m, n)); if (key) Metric.cache.set(key, res); return res; })() ); }); } return { metric: this.metric, a: this.origA[i] ?? a, b: this.origB[j] ?? b, ...result }; }, `Failed to compute metric for inputs at indices a[${i}] and b[${j}]`, { i, j } ); } async runSingleAsync(i, j) { return Promise.resolve(this.runSingle(i, j)); } runBatch() { const results = []; for (let i = 0; i < this.a.length; i++) for (let j = 0; j < this.b.length; j++) results.push(this.runSingle(i, j)); this.results = results; } async runBatchAsync() { const results = []; for (let i = 0; i < this.a.length; i++) for (let j = 0; j < this.b.length; j++) results.push(await this.runSingleAsync(i, j)); this.results = results; } runPairwise() { const results = []; for (let i = 0; i < this.a.length; i++) results.push(this.runSingle(i, i)); this.results = results; } async runPairwiseAsync() { const results = []; for (let i = 0; i < this.a.length; i++) results.push(await this.runSingleAsync(i, i)); this.results = results; } setOriginal(a, b) { if (a) this.origA = Array.isArray(a) ? a : [a]; if (b) this.origB = Array.isArray(b) ? b : [b]; return this; } isBatch = () => this.a.length > 1 || this.b.length > 1; isSingle = () => !this.isBatch(); isPairwise(safe = false) { return this.isBatch() && this.a.length === this.b.length ? true : !safe && (() => { throw new Errors.CmpStrUsageError( `Mode <pairwise> requires arrays of equal length`, { a: this.a, b: this.b } ); })(); } isSymmetrical = () => this.symmetric; whichMode = (mode) => mode ?? this.options?.mode ?? 'default'; clear = () => (this.results = undefined); run(mode, clear = true) { if (clear) this.clear(); switch (this.whichMode(mode)) { case 'default': if (this.isSingle()) { this.results = this.runSingle(0, 0); break; } case 'batch': this.runBatch(); break; case 'single': this.results = this.runSingle(0, 0); break; case 'pairwise': if (this.isPairwise()) this.runPairwise(); break; default: throw new Errors.CmpStrInternalError(`Unsupported mode <${mode}>`); } } async runAsync(mode, clear = true) { if (clear) this.clear(); switch (this.whichMode(mode)) { case 'default': if (this.isSingle()) { this.results = await this.runSingleAsync(0, 0); break; } case 'batch': await this.runBatchAsync(); break; case 'single': this.results = await this.runSingleAsync(0, 0); break; case 'pairwise': if (this.isPairwise()) await this.runPairwiseAsync(); break; default: throw new Errors.CmpStrInternalError( `Unsupported async mode <${mode}>` ); } } getMetricName = () => this.metric; getResults() { Errors.ErrorUtil.assert( this.results !== undefined, `run() must be called before getResults()` ); return this.results; } } const MetricRegistry = Registry.Registry('metric', Metric); exports.Metric = Metric; exports.MetricRegistry = MetricRegistry;