UNPKG

cmpstr

Version:

CmpStr is a lightweight, fast and well performing package for calculating string similarity

58 lines (57 loc) 2.28 kB
/** * Dice-Sørensen Coefficient * src/metric/DiceSorensen.ts * * @see https://en.wikipedia.org/wiki/Dice-S%C3%B8rensen_coefficient * * This module implements the Dice-Sørensen coefficient, a statistic used to gauge * the similarity of two samples. It is commonly used in natural language processing * and information retrieval to compare the similarity between two sets of data, * such as text documents. The coefficient is defined as twice the size of the * intersection divided by the sum of the sizes of the two sets. * * The implementation includes methods to compute bigrams from strings and calculate * the coefficient based on these bigrams. It handles edge cases, such as empty * strings and identical strings, to ensure accurate results. * * @module Metric/DiceSorensenCoefficient * @author Paul Köhler (komed3) * @license MIT */ import type { MetricInput, MetricOptions, MetricCompute } from '../utils/Types'; import { Metric } from './Metric'; export interface DiceRaw { intersection: number; size: number; } /** * DiceSorensenCoefficient class extends the Metric class to implement the Dice-Sørensen coefficient. */ export declare class DiceSorensenCoefficient extends Metric<DiceRaw> { /** * Constructor for the DiceSorensen class. * * Initializes the DiceSorensen metric with two input strings or * arrays of strings and optional options. * * @param {MetricInput} a - First input string or array of strings * @param {MetricInput} b - Second input string or array of strings * @param {MetricOptions} [opt] - Options for the metric computation */ constructor(a: MetricInput, b: MetricInput, opt?: MetricOptions); /** * Computes the bigrams of a given string. * * @param {string} str - The input string * @return {Set<string>} - A set of bigrams (two-character sequences) from the string */ private _bigrams; /** * Calculates the Dice-Sørensen coefficient between two strings. * * @param {string} a - First string * @param {string} b - Second string * @return {MetricCompute<DiceRaw>} - Object containing the similarity result and raw distance */ protected compute(a: string, b: string): MetricCompute<DiceRaw>; }