UNPKG

cmpstr

Version:

CmpStr is a lightweight, fast and well performing package for calculating string similarity

262 lines (261 loc) 10.8 kB
/** * Abstract Metric * src/metric/Metric.ts * * This module defines an abstract class for string metrics, providing a framework for * computing various string similarity metrics. It includes methods for running metrics * in different modes (single, batch, pairwise) synchronous or asynchronous and caching * results to optimize performance. The class is designed to be extended by specific * metric implementations like the Levenshtein distance or Jaro-Winkler similarity. * * It provides: * - A base class for string metrics with common functionality * - Methods for running metrics in different modes * - Pre-computation for trivial cases to optimize performance * - Caching of metric computations to avoid redundant calculations * - Support for symmetrical metrics (same result for inputs in any order) * - Performance tracking capabilities (Profiler) * - Asynchronous execution support for metrics * * This class is intended to be extended by specific metric implementations that will * implement the `compute` method to define the specific metric computation logic. * * @module Metric * @author Paul Köhler (komed3) * @license MIT */ import type { MetricMode, MetricInput, MetricOptions, MetricCompute, MetricRaw, MetricResult, RegistryService } from '../utils/Types'; /** * Abstract class representing a generic string metric. * * @abstract * @template R - The type of the raw result, defaulting to `MetricRaw`. */ export declare abstract class Metric<R = MetricRaw> { private static cache; private readonly metric; private readonly a; private readonly b; private origA; private origB; protected readonly options: MetricOptions; protected readonly symmetric: boolean; /** * Result of the metric computation, which can be a single result or an array of results. * This will be populated after running the metric. */ private results; /** * Static method to clear the cache of metric computations. */ static clear(): void; /** * Swaps two strings and their lengths if the first is longer than the second. * * @param {string} a - First string * @param {string} b - Second string * @param {number} m - Length of the first string * @param {number} n - Length of the second string * @returns {[string, string, number, number]} - Swapped strings and lengths */ protected static swap(a: string, b: string, m: number, n: number): [ string, string, number, number ]; /** * Clamps the similarity result between 0 and 1. * * @param {number} res - The input similarity to clamp * @returns {number} - The clamped similarity (0 to 1) */ protected static clamp(res: number): number; /** * Constructor for the Metric class. * Initializes the metric with two inputs (strings or arrays of strings) and options. * * @param {string} metric - The name of the metric (e.g. 'levenshtein') * @param {MetricInput} a - First input string or array of strings * @param {MetricInput} b - Second input string or array of strings * @param {MetricOptions} [opt] - Options for the metric computation * @param {boolean} [symmetric=false] - Whether the metric is symmetric (same result for inputs in any order) * @throws {Error} - If inputs `a` or `b` are empty */ constructor(metric: string, a: MetricInput, b: MetricInput, opt?: MetricOptions, symmetric?: boolean); /** * Pre-compute the metric for two strings. * This method is called before the actual computation to handle trivial cases. * * @param {string} a - First string * @param {string} b - Second string * @param {number} m - Length of the first string * @param {number} n - Length of the second string * @returns {MetricCompute<R>|undefined} - Pre-computed result or undefined if not applicable */ protected preCompute(a: string, b: string, m: number, n: number): MetricCompute<R> | undefined; /** * Abstract method to be implemented by subclasses to perform the metric computation. * This method should contain the logic for computing the metric between two strings. * * @param {string} a - First string * @param {string} b - Second string * @param {number} m - Length of the first string * @param {number} n - Length of the second string * @param {number} maxLen - Maximum length of the strings * @returns {MetricCompute<R>} - The result of the metric computation * @throws {Error} - If not overridden in a subclass */ protected compute(a: string, b: string, m: number, n: number, maxLen: number): MetricCompute<R>; /** * Run the metric computation for single inputs (two strings). * Applies preCompute for trivial cases before cache lookup and computation. * * If the profiler is active, it will measure time and memory usage. * * @param {number} i - Pointer to the first string * @param {number} j - Pointer to the second string * @returns {MetricResultSingle<R>} - The result of the metric computation */ private runSingle; /** * Run the metric computation for single inputs (two strings) asynchronously. * * @param {number} i - Pointer to the first string * @param {number} j - Pointer to the second string * @returns {Promise<MetricResultSingle<R>>} - Promise resolving the result of the metric computation */ private runSingleAsync; /** * Run the metric computation for batch inputs (arrays of strings). * * It iterates through each string in the first array and computes the metric * against each string in the second array. */ private runBatch; /** * Run the metric computation for batch inputs (arrays of strings) asynchronously. */ private runBatchAsync; /** * Run the metric computation for pairwise inputs (A[i] vs B[i]). * * This method assumes that both `a` and `b` are arrays of equal length * and computes the metric only for corresponding index pairs. */ private runPairwise; /** * Run the metric computation for pairwise inputs (A[i] vs B[i]) asynchronously. */ private runPairwiseAsync; /** * Set the original inputs to which the results of the metric calculation will refer. * * @param {MetricInput} [a] - original input(s) for a * @param {MetricInput} [b] - original input(s) for b */ setOriginal(a?: MetricInput, b?: MetricInput): this; /** * Check if the inputs are in batch mode. * * This method checks if either `a` or `b` contains more than one string, * indicating that the metric is being run in batch mode. * * @returns {boolean} - True if either input is an array with more than one element */ isBatch(): boolean; /** * Check if the inputs are in single mode. * * This method checks if both `a` and `b` are single strings (not arrays), * indicating that the metric is being run on a single pair of strings. * * @returns {boolean} - True if both inputs are single strings */ isSingle(): boolean; /** * Check if the inputs are in pairwise mode. * * This method checks if both `a` and `b` are arrays of the same length, * indicating that the metric is being run on corresponding pairs of strings. * * @returns {boolean} - True if both inputs are arrays of equal length * @param {boolean} [safe=false] - If true, does not throw an error if lengths are not equal * @throws {Error} - If `safe` is false and the lengths of `a` and `b` are not equal */ isPairwise(safe?: boolean): boolean; /** * Check if the metric is symmetrical. * * This method returns whether the metric is symmetric, meaning it produces the same * result regardless of the order of inputs (e.g., Levenshtein distance). * * @returns {boolean} - True if the metric is symmetric */ isSymmetrical(): boolean; /** * Determine which mode to run the metric in. * * This method checks the provided mode or defaults to the mode specified in options. * If no mode is specified, it defaults to 'default'. * * @param {MetricMode} [mode] - The mode to run the metric in (optional) * @returns {MetricMode} - The determined mode */ whichMode(mode?: MetricMode): MetricMode; /** * Clear the cached results of the metric. * * This method resets the `results` property to `undefined`, effectively clearing * any previously computed results. It can be useful for re-running the metric * with new inputs or options. */ clear(): void; /** * Run the metric computation based on the specified mode. * * @param {MetricMode} [mode] - The mode to run the metric in (optional) * @param {boolean} [clear=true] - Whether to clear previous results before running * @throws {Error} - If an unsupported mode is specified */ run(mode?: MetricMode, clear?: boolean): void; /** * Run the metric computation based on the specified mode asynchronously. * * @param {MetricMode} [mode] - The mode to run the metric in (optional) * @param {boolean} [clear=true] - Whether to clear previous results before running * @returns {Promise<void>} - A promise that resolves when the metric computation is complete * @throws {Error} - If an unsupported mode is specified */ runAsync(mode?: MetricMode, clear?: boolean): Promise<void>; /** * Get the name of the metric. * * @returns {string} - The name of the metric */ getMetricName(): string; /** * Get the result of the metric computation. * * @returns {MetricResult<R>} - The result of the metric computation * @throws {Error} - If `run()` has not been called before this method */ getResults(): MetricResult<R>; } /** * Metric registry service for managing metric implementations. * * This registry allows for dynamic registration and retrieval of metric classes, * enabling the use of various string similarity metrics in a consistent manner. */ export declare const MetricRegistry: RegistryService<Metric<MetricRaw>>; /** * Type definition for a class constructor that extends the Metric class. * * This type represents a constructor function for a class that extends the Metric * class. It can be used to create instances of specific metric implementations, * such as Levenshtein or Jaro-Winkler. * * @template R - The type of the raw result, defaulting to `MetricRaw`. */ export type MetricCls<R = MetricRaw> = new (...args: any[]) => Metric<R>;