cmpstr
Version:
CmpStr is a lightweight, fast and well performing package for calculating string similarity
90 lines (87 loc) • 3.07 kB
JavaScript
// CmpStr v3.0.1 dev-052fa0c-250614 by Paul Köhler @komed3 / MIT License
import { MetricRegistry, Metric } from './Metric.js';
import { Pool } from '../utils/Pool.js';
/**
* q-Gram Similarity
* src/metric/QGram.ts
*
* @see https://en.wikipedia.org/wiki/Q-gram
*
* Q-gram similarity is a string-matching algorithm that compares two strings by
* breaking them into substrings (q-grams) of length Q. The similarity is computed
* as the size of the intersection of q-gram sets divided by the size of the larger
* set.
*
* This metric is widely used in approximate string matching, information retrieval,
* and computational linguistics.
*
* @module Metric/QGramSimilarity
* @author Paul Köhler (komed3)
* @license MIT
*/
/**
* QGramSimilarity class extends the Metric class to implement the q-Gram similarity algorithm.
*/
class QGramSimilarity extends Metric {
/**
* Constructor for the QGramSimilarity class.
*
* Initializes the q-Gram similarity metric with two input strings or
* arrays of strings and optional options.
*
* @param {MetricInput} a - First input string or array of strings
* @param {MetricInput} b - Second input string or array of strings
* @param {MetricOptions} [opt] - Options for the metric computation
*/
constructor(a, b, opt = {}) {
// Call the parent Metric constructor with the metric name and inputs
// Metric is symmetrical
super('qgram', a, b, opt, true);
}
/**
* Converts a string into a set of q-grams (substrings of length q).
*
* @param {string} str - The input string
* @param {number} q - The length of each q-gram
* @return {Set<string>} - Set of q-grams
*/
_qGrams(str, q) {
const len = Math.max(0, str.length - q + 1);
const grams = Pool.acquire('set', len);
for (let i = 0; i < len; i++) grams.add(str.slice(i, i + q));
return grams;
}
/**
* Calculates the q-Gram similarity between two strings.
*
* @param {string} a - First string
* @param {string} b - Second string
* @return {MetricCompute<QGramRaw>} - Object containing the similarity result and raw values
*/
compute(a, b) {
// Get q from options or use default "2"
const { q = 2 } = this.options;
// Generate q-gram sets for both strings
const setA = this._qGrams(a, q);
const setB = this._qGrams(b, q);
// Calculate intersection size
let intersection = 0;
for (const gram of setA) if (setB.has(gram)) intersection++;
// Calculate the size of the larger set
const sizeA = setA.size,
sizeB = setB.size;
const size = Math.max(sizeA, sizeB);
// Release sets back to the pool
Pool.release('set', setA, sizeA);
Pool.release('set', setB, sizeB);
// Return the result as a MetricCompute object
return {
res: size === 0 ? 1 : Metric.clamp(intersection / size),
raw: { intersection, size }
};
}
}
// Register the q-Gram similariry in the metric registry
MetricRegistry.add('qGram', QGramSimilarity);
export { QGramSimilarity };
//# sourceMappingURL=qGram.js.map