cmpstr
Version:
CmpStr is a lightweight, fast and well performing package for calculating string similarity
58 lines (57 loc) • 2.28 kB
TypeScript
/**
* Dice-Sørensen Coefficient
* src/metric/DiceSorensen.ts
*
* @see https://en.wikipedia.org/wiki/Dice-S%C3%B8rensen_coefficient
*
* This module implements the Dice-Sørensen coefficient, a statistic used to gauge
* the similarity of two samples. It is commonly used in natural language processing
* and information retrieval to compare the similarity between two sets of data,
* such as text documents. The coefficient is defined as twice the size of the
* intersection divided by the sum of the sizes of the two sets.
*
* The implementation includes methods to compute bigrams from strings and calculate
* the coefficient based on these bigrams. It handles edge cases, such as empty
* strings and identical strings, to ensure accurate results.
*
* @module Metric/DiceSorensenCoefficient
* @author Paul Köhler (komed3)
* @license MIT
*/
import type { MetricInput, MetricOptions, MetricCompute } from '../utils/Types';
import { Metric } from './Metric';
export interface DiceRaw {
intersection: number;
size: number;
}
/**
* DiceSorensenCoefficient class extends the Metric class to implement the Dice-Sørensen coefficient.
*/
export declare class DiceSorensenCoefficient extends Metric<DiceRaw> {
/**
* Constructor for the DiceSorensen class.
*
* Initializes the DiceSorensen metric with two input strings or
* arrays of strings and optional options.
*
* @param {MetricInput} a - First input string or array of strings
* @param {MetricInput} b - Second input string or array of strings
* @param {MetricOptions} [opt] - Options for the metric computation
*/
constructor(a: MetricInput, b: MetricInput, opt?: MetricOptions);
/**
* Computes the bigrams of a given string.
*
* @param {string} str - The input string
* @return {Set<string>} - A set of bigrams (two-character sequences) from the string
*/
private _bigrams;
/**
* Calculates the Dice-Sørensen coefficient between two strings.
*
* @param {string} a - First string
* @param {string} b - Second string
* @return {MetricCompute<DiceRaw>} - Object containing the similarity result and raw distance
*/
protected compute(a: string, b: string): MetricCompute<DiceRaw>;
}