UNPKG

cmpstr

Version:

CmpStr is a lightweight, fast and well performing package for calculating string similarity

58 lines (57 loc) 2.13 kB
/** * Cosine Similarity * src/metric/Cosine.ts * * @see https://en.wikipedia.org/wiki/Cosine_similarity * * Cosine similarity is a metric used to measure how similar two vectors are, regardless * of their magnitude. In text analysis, it is commonly used to compare documents or * strings by representing them as term frequency vectors and computing the cosine of * the angle between these vectors. * * The result is a value between 0 and 1, where 1 means the vectors are identical and * 0 means they are orthogonal (no similarity). * * @module Metric/CosineSimilarity * @author Paul Köhler (komed3) * @license MIT */ import type { MetricInput, MetricOptions, MetricCompute } from '../utils/Types'; import { Metric } from './Metric'; export interface CosineRaw { dotProduct: number; magnitudeA: number; magnitudeB: number; } /** * CosineSimilarity class extends the Metric class to implement the Cosine similarity algorithm. */ export declare class CosineSimilarity extends Metric<CosineRaw> { /** * Constructor for the CosineSimilarity class. * * Initializes the Cosine similarity metric with two input strings or * arrays of strings and optional options. * * @param {MetricInput} a - First input string or array of strings * @param {MetricInput} b - Second input string or array of strings * @param {MetricOptions} [opt] - Options for the metric computation */ constructor(a: MetricInput, b: MetricInput, opt?: MetricOptions); /** * Calculates the term frequency vector for a given string. * * @param {string} str - The input string * @param {string} delimiter - The delimiter to split terms * @return {Map<string, number>} - Term frequency object */ private _termFreq; /** * Calculates the Cosine similarity between two strings. * * @param {string} a - First string * @param {string} b - Second string * @return {MetricCompute<CosineRaw>} - Object containing the similarity result and raw values */ protected compute(a: string, b: string): MetricCompute<CosineRaw>; }