UNPKG

cmpstr

Version:

CmpStr is a lightweight, fast and well performing package for calculating string similarity

49 lines (48 loc) 1.88 kB
/** * Jaccard Index * src/metric/Jaccard.ts * * @see https://en.wikipedia.org/wiki/Jaccard_index * * The Jaccard Index (or Jaccard similarity coefficient) measures the similarity * between two sets by dividing the size of their intersection by the size of * their union. In string similarity, it is often used to compare sets of characters, * tokens, or n-grams. The result is a value between 0 and 1, where 1 means the * sets are identical and 0 means they have no elements in common. * * @module Metric/JaccardIndex * @author Paul Köhler (komed3) * @license MIT */ import type { MetricInput, MetricOptions, MetricCompute } from '../utils/Types'; import { Metric } from './Metric'; export interface JaccardRaw { intersection: number; union: number; } /** * JaccardIndex class extends the Metric class to implement the Jaccard Index algorithm. */ export declare class JaccardIndex extends Metric<JaccardRaw> { /** * Constructor for the JaccardIndex class. * * Initializes the Jaccard Index metric with two input strings or * arrays of strings and optional options. * * @param {MetricInput} a - First input string or array of strings * @param {MetricInput} b - Second input string or array of strings * @param {MetricOptions} [opt] - Options for the metric computation */ constructor(a: MetricInput, b: MetricInput, opt?: MetricOptions); /** * Calculates the Jaccard Index between two strings. * * @param {string} a - First string * @param {string} b - Second string * @param {number} m - Length of the first string * @param {number} n - Length of the second string * @return {MetricCompute<JaccardRaw>} - Object containing the similarity result and raw values */ protected compute(a: string, b: string, m: number, n: number): MetricCompute<JaccardRaw>; }