cmpstr
Version:
CmpStr is a lightweight, fast and well performing package for calculating string similarity
38 lines (35 loc) • 1.12 kB
JavaScript
// CmpStr v3.2.2 build-bb61120-260311 by Paul Köhler @komed3 / MIT License
import { Pool } from '../utils/Pool.mjs';
import { MetricRegistry, Metric } from './Metric.mjs';
class QGramSimilarity extends Metric {
constructor(a, b, opt = {}) {
super('qGram', a, b, opt, true);
}
_qGrams(str, q) {
const len = Math.max(0, str.length - q + 1);
const grams = Pool.acquire('set', len);
for (let i = 0; i < len; i++) grams.add(str.slice(i, i + q));
return grams;
}
compute(a, b) {
const { q = 2 } = this.options;
const setA = this._qGrams(a, q),
setB = this._qGrams(b, q);
const sizeA = setA.size,
sizeB = setB.size;
try {
let intersection = 0;
for (const gram of setA) if (setB.has(gram)) intersection++;
const size = Math.max(sizeA, sizeB);
return {
res: size === 0 ? 1 : Metric.clamp(intersection / size),
raw: { intersection, size }
};
} finally {
Pool.release('set', setA, sizeA);
Pool.release('set', setB, sizeB);
}
}
}
MetricRegistry.add('qGram', QGramSimilarity);
export { QGramSimilarity };