UNPKG

cmpstr

Version:

CmpStr is a lightweight, fast and well performing package for calculating string similarity

46 lines (42 loc) 1.43 kB
// CmpStr v3.2.2 build-bb61120-260311 by Paul Köhler @komed3 / MIT License 'use strict'; var Pool = require('../utils/Pool.cjs'); var Metric = require('./Metric.cjs'); class CosineSimilarity extends Metric.Metric { constructor(a, b, opt = {}) { super('cosine', a, b, opt, true); } _termFreq(str, delimiter) { const terms = str.split(delimiter); const freq = Pool.Pool.acquire('map', terms.length); for (const term of terms) freq.set(term, (freq.get(term) || 0) + 1); return freq; } compute(a, b) { const { delimiter = ' ' } = this.options; const termsA = this._termFreq(a, delimiter); const termsB = this._termFreq(b, delimiter); try { let dotP = 0, magA = 0, magB = 0; for (const [term, freqA] of termsA) { const freqB = termsB.get(term) || 0; dotP += freqA * freqB; magA += freqA * freqA; } for (const freqB of termsB.values()) magB += freqB * freqB; magA = Math.sqrt(magA); magB = Math.sqrt(magB); return { res: magA && magB ? Metric.Metric.clamp(dotP / (magA * magB)) : 0, raw: { dotProduct: dotP, magnitudeA: magA, magnitudeB: magB } }; } finally { Pool.Pool.release('map', termsA, termsA.size); Pool.Pool.release('map', termsB, termsB.size); } } } Metric.MetricRegistry.add('cosine', CosineSimilarity); exports.CosineSimilarity = CosineSimilarity;