cmpstr
Version:
CmpStr is a lightweight, fast and well performing package for calculating string similarity
262 lines (261 loc) • 10.8 kB
TypeScript
/**
* Abstract Metric
* src/metric/Metric.ts
*
* This module defines an abstract class for string metrics, providing a framework for
* computing various string similarity metrics. It includes methods for running metrics
* in different modes (single, batch, pairwise) synchronous or asynchronous and caching
* results to optimize performance. The class is designed to be extended by specific
* metric implementations like the Levenshtein distance or Jaro-Winkler similarity.
*
* It provides:
* - A base class for string metrics with common functionality
* - Methods for running metrics in different modes
* - Pre-computation for trivial cases to optimize performance
* - Caching of metric computations to avoid redundant calculations
* - Support for symmetrical metrics (same result for inputs in any order)
* - Performance tracking capabilities (Profiler)
* - Asynchronous execution support for metrics
*
* This class is intended to be extended by specific metric implementations that will
* implement the `compute` method to define the specific metric computation logic.
*
* @module Metric
* @author Paul Köhler (komed3)
* @license MIT
*/
import type { MetricMode, MetricInput, MetricOptions, MetricCompute, MetricRaw, MetricResult, RegistryService } from '../utils/Types';
/**
* Abstract class representing a generic string metric.
*
* @abstract
* @template R - The type of the raw result, defaulting to `MetricRaw`.
*/
export declare abstract class Metric<R = MetricRaw> {
private static cache;
private readonly metric;
private readonly a;
private readonly b;
private origA;
private origB;
protected readonly options: MetricOptions;
protected readonly symmetric: boolean;
/**
* Result of the metric computation, which can be a single result or an array of results.
* This will be populated after running the metric.
*/
private results;
/**
* Static method to clear the cache of metric computations.
*/
static clear(): void;
/**
* Swaps two strings and their lengths if the first is longer than the second.
*
* @param {string} a - First string
* @param {string} b - Second string
* @param {number} m - Length of the first string
* @param {number} n - Length of the second string
* @returns {[string, string, number, number]} - Swapped strings and lengths
*/
protected static swap(a: string, b: string, m: number, n: number): [
string,
string,
number,
number
];
/**
* Clamps the similarity result between 0 and 1.
*
* @param {number} res - The input similarity to clamp
* @returns {number} - The clamped similarity (0 to 1)
*/
protected static clamp(res: number): number;
/**
* Constructor for the Metric class.
* Initializes the metric with two inputs (strings or arrays of strings) and options.
*
* @param {string} metric - The name of the metric (e.g. 'levenshtein')
* @param {MetricInput} a - First input string or array of strings
* @param {MetricInput} b - Second input string or array of strings
* @param {MetricOptions} [opt] - Options for the metric computation
* @param {boolean} [symmetric=false] - Whether the metric is symmetric (same result for inputs in any order)
* @throws {Error} - If inputs `a` or `b` are empty
*/
constructor(metric: string, a: MetricInput, b: MetricInput, opt?: MetricOptions, symmetric?: boolean);
/**
* Pre-compute the metric for two strings.
* This method is called before the actual computation to handle trivial cases.
*
* @param {string} a - First string
* @param {string} b - Second string
* @param {number} m - Length of the first string
* @param {number} n - Length of the second string
* @returns {MetricCompute<R>|undefined} - Pre-computed result or undefined if not applicable
*/
protected preCompute(a: string, b: string, m: number, n: number): MetricCompute<R> | undefined;
/**
* Abstract method to be implemented by subclasses to perform the metric computation.
* This method should contain the logic for computing the metric between two strings.
*
* @param {string} a - First string
* @param {string} b - Second string
* @param {number} m - Length of the first string
* @param {number} n - Length of the second string
* @param {number} maxLen - Maximum length of the strings
* @returns {MetricCompute<R>} - The result of the metric computation
* @throws {Error} - If not overridden in a subclass
*/
protected compute(a: string, b: string, m: number, n: number, maxLen: number): MetricCompute<R>;
/**
* Run the metric computation for single inputs (two strings).
* Applies preCompute for trivial cases before cache lookup and computation.
*
* If the profiler is active, it will measure time and memory usage.
*
* @param {number} i - Pointer to the first string
* @param {number} j - Pointer to the second string
* @returns {MetricResultSingle<R>} - The result of the metric computation
*/
private runSingle;
/**
* Run the metric computation for single inputs (two strings) asynchronously.
*
* @param {number} i - Pointer to the first string
* @param {number} j - Pointer to the second string
* @returns {Promise<MetricResultSingle<R>>} - Promise resolving the result of the metric computation
*/
private runSingleAsync;
/**
* Run the metric computation for batch inputs (arrays of strings).
*
* It iterates through each string in the first array and computes the metric
* against each string in the second array.
*/
private runBatch;
/**
* Run the metric computation for batch inputs (arrays of strings) asynchronously.
*/
private runBatchAsync;
/**
* Run the metric computation for pairwise inputs (A[i] vs B[i]).
*
* This method assumes that both `a` and `b` are arrays of equal length
* and computes the metric only for corresponding index pairs.
*/
private runPairwise;
/**
* Run the metric computation for pairwise inputs (A[i] vs B[i]) asynchronously.
*/
private runPairwiseAsync;
/**
* Set the original inputs to which the results of the metric calculation will refer.
*
* @param {MetricInput} [a] - original input(s) for a
* @param {MetricInput} [b] - original input(s) for b
*/
setOriginal(a?: MetricInput, b?: MetricInput): this;
/**
* Check if the inputs are in batch mode.
*
* This method checks if either `a` or `b` contains more than one string,
* indicating that the metric is being run in batch mode.
*
* @returns {boolean} - True if either input is an array with more than one element
*/
isBatch(): boolean;
/**
* Check if the inputs are in single mode.
*
* This method checks if both `a` and `b` are single strings (not arrays),
* indicating that the metric is being run on a single pair of strings.
*
* @returns {boolean} - True if both inputs are single strings
*/
isSingle(): boolean;
/**
* Check if the inputs are in pairwise mode.
*
* This method checks if both `a` and `b` are arrays of the same length,
* indicating that the metric is being run on corresponding pairs of strings.
*
* @returns {boolean} - True if both inputs are arrays of equal length
* @param {boolean} [safe=false] - If true, does not throw an error if lengths are not equal
* @throws {Error} - If `safe` is false and the lengths of `a` and `b` are not equal
*/
isPairwise(safe?: boolean): boolean;
/**
* Check if the metric is symmetrical.
*
* This method returns whether the metric is symmetric, meaning it produces the same
* result regardless of the order of inputs (e.g., Levenshtein distance).
*
* @returns {boolean} - True if the metric is symmetric
*/
isSymmetrical(): boolean;
/**
* Determine which mode to run the metric in.
*
* This method checks the provided mode or defaults to the mode specified in options.
* If no mode is specified, it defaults to 'default'.
*
* @param {MetricMode} [mode] - The mode to run the metric in (optional)
* @returns {MetricMode} - The determined mode
*/
whichMode(mode?: MetricMode): MetricMode;
/**
* Clear the cached results of the metric.
*
* This method resets the `results` property to `undefined`, effectively clearing
* any previously computed results. It can be useful for re-running the metric
* with new inputs or options.
*/
clear(): void;
/**
* Run the metric computation based on the specified mode.
*
* @param {MetricMode} [mode] - The mode to run the metric in (optional)
* @param {boolean} [clear=true] - Whether to clear previous results before running
* @throws {Error} - If an unsupported mode is specified
*/
run(mode?: MetricMode, clear?: boolean): void;
/**
* Run the metric computation based on the specified mode asynchronously.
*
* @param {MetricMode} [mode] - The mode to run the metric in (optional)
* @param {boolean} [clear=true] - Whether to clear previous results before running
* @returns {Promise<void>} - A promise that resolves when the metric computation is complete
* @throws {Error} - If an unsupported mode is specified
*/
runAsync(mode?: MetricMode, clear?: boolean): Promise<void>;
/**
* Get the name of the metric.
*
* @returns {string} - The name of the metric
*/
getMetricName(): string;
/**
* Get the result of the metric computation.
*
* @returns {MetricResult<R>} - The result of the metric computation
* @throws {Error} - If `run()` has not been called before this method
*/
getResults(): MetricResult<R>;
}
/**
* Metric registry service for managing metric implementations.
*
* This registry allows for dynamic registration and retrieval of metric classes,
* enabling the use of various string similarity metrics in a consistent manner.
*/
export declare const MetricRegistry: RegistryService<Metric<MetricRaw>>;
/**
* Type definition for a class constructor that extends the Metric class.
*
* This type represents a constructor function for a class that extends the Metric
* class. It can be used to create instances of specific metric implementations,
* such as Levenshtein or Jaro-Winkler.
*
* @template R - The type of the raw result, defaulting to `MetricRaw`.
*/
export type MetricCls<R = MetricRaw> = new (...args: any[]) => Metric<R>;