cmpstr
Version:
CmpStr is a lightweight, fast and well performing package for calculating string similarity
75 lines (72 loc) • 2.74 kB
JavaScript
// CmpStr v3.0.1 dev-052fa0c-250614 by Paul Köhler @komed3 / MIT License
import { MetricRegistry, Metric } from './Metric.js';
import { Pool } from '../utils/Pool.js';
/**
* Jaccard Index
* src/metric/Jaccard.ts
*
* @see https://en.wikipedia.org/wiki/Jaccard_index
*
* The Jaccard Index (or Jaccard similarity coefficient) measures the similarity
* between two sets by dividing the size of their intersection by the size of
* their union. In string similarity, it is often used to compare sets of characters,
* tokens, or n-grams. The result is a value between 0 and 1, where 1 means the
* sets are identical and 0 means they have no elements in common.
*
* @module Metric/JaccardIndex
* @author Paul Köhler (komed3)
* @license MIT
*/
/**
* JaccardIndex class extends the Metric class to implement the Jaccard Index algorithm.
*/
class JaccardIndex extends Metric {
/**
* Constructor for the JaccardIndex class.
*
* Initializes the Jaccard Index metric with two input strings or
* arrays of strings and optional options.
*
* @param {MetricInput} a - First input string or array of strings
* @param {MetricInput} b - Second input string or array of strings
* @param {MetricOptions} [opt] - Options for the metric computation
*/
constructor(a, b, opt = {}) {
// Call the parent Metric constructor with the metric name and inputs
// Metric is symmetrical
super('jaccard', a, b, opt, true);
}
/**
* Calculates the Jaccard Index between two strings.
*
* @param {string} a - First string
* @param {string} b - Second string
* @param {number} m - Length of the first string
* @param {number} n - Length of the second string
* @return {MetricCompute<JaccardRaw>} - Object containing the similarity result and raw values
*/
compute(a, b, m, n) {
// Acquire two sets from the Pool
const [setA, setB] = Pool.acquireMany('set', [m, n]);
// Fill setA and setB with unique characters from a and b
for (const A of a) setA.add(A);
for (const B of b) setB.add(B);
// Calculate intersection size
let intersection = 0;
for (const c of setA) if (setB.has(c)) intersection++;
// Calculate union size (setA + elements in setB not in setA)
const union = setA.size + setB.size - intersection;
// Release sets back to the pool
Pool.release('set', setA, m);
Pool.release('set', setB, n);
// Return the result as a MetricCompute object
return {
res: union === 0 ? 1 : Metric.clamp(intersection / union),
raw: { intersection, union }
};
}
}
// Register the Jaccard index in the metric registry
MetricRegistry.add('jaccard', JaccardIndex);
export { JaccardIndex };
//# sourceMappingURL=Jaccard.js.map