cmpstr
Version:
CmpStr is a lightweight, fast and well performing package for calculating string similarity
46 lines (42 loc) • 1.43 kB
JavaScript
// CmpStr v3.2.2 build-bb61120-260311 by Paul Köhler @komed3 / MIT License
;
var Pool = require('../utils/Pool.cjs');
var Metric = require('./Metric.cjs');
class CosineSimilarity extends Metric.Metric {
constructor(a, b, opt = {}) {
super('cosine', a, b, opt, true);
}
_termFreq(str, delimiter) {
const terms = str.split(delimiter);
const freq = Pool.Pool.acquire('map', terms.length);
for (const term of terms) freq.set(term, (freq.get(term) || 0) + 1);
return freq;
}
compute(a, b) {
const { delimiter = ' ' } = this.options;
const termsA = this._termFreq(a, delimiter);
const termsB = this._termFreq(b, delimiter);
try {
let dotP = 0,
magA = 0,
magB = 0;
for (const [term, freqA] of termsA) {
const freqB = termsB.get(term) || 0;
dotP += freqA * freqB;
magA += freqA * freqA;
}
for (const freqB of termsB.values()) magB += freqB * freqB;
magA = Math.sqrt(magA);
magB = Math.sqrt(magB);
return {
res: magA && magB ? Metric.Metric.clamp(dotP / (magA * magB)) : 0,
raw: { dotProduct: dotP, magnitudeA: magA, magnitudeB: magB }
};
} finally {
Pool.Pool.release('map', termsA, termsA.size);
Pool.Pool.release('map', termsB, termsB.size);
}
}
}
Metric.MetricRegistry.add('cosine', CosineSimilarity);
exports.CosineSimilarity = CosineSimilarity;