UNPKG

cmpstr

Version:

CmpStr is a lightweight, fast and well performing package for calculating string similarity

57 lines (56 loc) 2.01 kB
/** * q-Gram Similarity * src/metric/QGram.ts * * @see https://en.wikipedia.org/wiki/Q-gram * * Q-gram similarity is a string-matching algorithm that compares two strings by * breaking them into substrings (q-grams) of length Q. The similarity is computed * as the size of the intersection of q-gram sets divided by the size of the larger * set. * * This metric is widely used in approximate string matching, information retrieval, * and computational linguistics. * * @module Metric/QGramSimilarity * @author Paul Köhler (komed3) * @license MIT */ import type { MetricInput, MetricOptions, MetricCompute } from '../utils/Types'; import { Metric } from './Metric'; export interface QGramRaw { intersection: number; size: number; } /** * QGramSimilarity class extends the Metric class to implement the q-Gram similarity algorithm. */ export declare class QGramSimilarity extends Metric<QGramRaw> { /** * Constructor for the QGramSimilarity class. * * Initializes the q-Gram similarity metric with two input strings or * arrays of strings and optional options. * * @param {MetricInput} a - First input string or array of strings * @param {MetricInput} b - Second input string or array of strings * @param {MetricOptions} [opt] - Options for the metric computation */ constructor(a: MetricInput, b: MetricInput, opt?: MetricOptions); /** * Converts a string into a set of q-grams (substrings of length q). * * @param {string} str - The input string * @param {number} q - The length of each q-gram * @return {Set<string>} - Set of q-grams */ private _qGrams; /** * Calculates the q-Gram similarity between two strings. * * @param {string} a - First string * @param {string} b - Second string * @return {MetricCompute<QGramRaw>} - Object containing the similarity result and raw values */ protected compute(a: string, b: string): MetricCompute<QGramRaw>; }