UNPKG

cmpstr

Version:

CmpStr is a lightweight, fast and well performing package for calculating string similarity

75 lines (72 loc) 2.74 kB
// CmpStr v3.0.1 dev-052fa0c-250614 by Paul Köhler @komed3 / MIT License import { MetricRegistry, Metric } from './Metric.js'; import { Pool } from '../utils/Pool.js'; /** * Jaccard Index * src/metric/Jaccard.ts * * @see https://en.wikipedia.org/wiki/Jaccard_index * * The Jaccard Index (or Jaccard similarity coefficient) measures the similarity * between two sets by dividing the size of their intersection by the size of * their union. In string similarity, it is often used to compare sets of characters, * tokens, or n-grams. The result is a value between 0 and 1, where 1 means the * sets are identical and 0 means they have no elements in common. * * @module Metric/JaccardIndex * @author Paul Köhler (komed3) * @license MIT */ /** * JaccardIndex class extends the Metric class to implement the Jaccard Index algorithm. */ class JaccardIndex extends Metric { /** * Constructor for the JaccardIndex class. * * Initializes the Jaccard Index metric with two input strings or * arrays of strings and optional options. * * @param {MetricInput} a - First input string or array of strings * @param {MetricInput} b - Second input string or array of strings * @param {MetricOptions} [opt] - Options for the metric computation */ constructor(a, b, opt = {}) { // Call the parent Metric constructor with the metric name and inputs // Metric is symmetrical super('jaccard', a, b, opt, true); } /** * Calculates the Jaccard Index between two strings. * * @param {string} a - First string * @param {string} b - Second string * @param {number} m - Length of the first string * @param {number} n - Length of the second string * @return {MetricCompute<JaccardRaw>} - Object containing the similarity result and raw values */ compute(a, b, m, n) { // Acquire two sets from the Pool const [setA, setB] = Pool.acquireMany('set', [m, n]); // Fill setA and setB with unique characters from a and b for (const A of a) setA.add(A); for (const B of b) setB.add(B); // Calculate intersection size let intersection = 0; for (const c of setA) if (setB.has(c)) intersection++; // Calculate union size (setA + elements in setB not in setA) const union = setA.size + setB.size - intersection; // Release sets back to the pool Pool.release('set', setA, m); Pool.release('set', setB, n); // Return the result as a MetricCompute object return { res: union === 0 ? 1 : Metric.clamp(intersection / union), raw: { intersection, union } }; } } // Register the Jaccard index in the metric registry MetricRegistry.add('jaccard', JaccardIndex); export { JaccardIndex }; //# sourceMappingURL=Jaccard.js.map