UNPKG

cmpstr

Version:

CmpStr is a lightweight, fast and well performing package for calculating string similarity

473 lines (472 loc) 19.1 kB
/** * CmpStr Main API * src/CmpStr.ts * * The CmpStr class provides a comprehensive, highly abstracted, and type-safe interface * for string comparison, similarity measurement, phonetic indexing, filtering, normalization, * and text analysis. It unifies all core features of the CmpStr package and exposes a * consistent, user-friendly API for both single and batch operations. * * Features: * - Centralized management of metrics, phonetic algorithms, and filters * - Flexible normalization and filtering pipeline for all inputs * - Batch, pairwise, and single string comparison with detailed results * - Phonetic indexing and phonetic-aware search and comparison * - Text analysis and unified diff utilities * - Full TypeScript type safety and extensibility * * @module CmpStr * @author Paul Köhler (komed3) * @license MIT */ import type { CmpStrOptions, CmpStrProcessors, CmpStrResult, NormalizeFlags, DiffOptions, PhoneticOptions, MetricRaw, MetricInput, MetricMode, MetricResult, MetricResultSingle, MetricResultBatch } from './utils/Types'; import { TextAnalyzer } from './utils/TextAnalyzer'; import { DiffChecker } from './utils/DiffChecker'; import { Normalizer } from './utils/Normalizer'; import { Filter } from './utils/Filter'; import { Metric } from './metric'; import { Phonetic } from './phonetic'; /** * The main CmpStr class that provides a unified interface for string comparison, * phonetic indexing, filtering, and text analysis. * * @template R - The type of the metric result, defaults to MetricRaw */ export declare class CmpStr<R = MetricRaw> { /** * -------------------------------------------------------------------------------- * Static methods and properties for global access to CmpStr features * -------------------------------------------------------------------------------- * * These static methods provide a convenient way to access the core features of * the CmpStr package without needing to instantiate a CmpStr object. */ /** * Adds, removes, pauses, resumes, lists, or clears global filters. * * @see Filter */ static readonly filter: { add: typeof Filter.add; remove: typeof Filter.remove; pause: typeof Filter.pause; resume: typeof Filter.resume; list: typeof Filter.list; clear: typeof Filter.clear; }; /** * Adds, removes, checks, or lists available metrics. * * @see MetricRegistry */ static readonly metric: { add: (name: string, cls: import("./utils/Types").RegistryConstructor<Metric<MetricRaw>>, update?: boolean) => void; remove: (name: string) => void; has: (name: string) => boolean; list: () => string[]; }; /** * Adds, removes, checks, or lists available phonetic algorithms and mappings. * * @see PhoneticRegistry */ static readonly phonetic: { add: (name: string, cls: import("./utils/Types").RegistryConstructor<Phonetic>, update?: boolean) => void; remove: (name: string) => void; has: (name: string) => boolean; list: () => string[]; map: { add: (algo: string, id: string, map: import("./utils/Types").PhoneticMap, update?: boolean) => void; remove: (algo: string, id: string) => void; has: (algo: string, id: string) => boolean; list: (algo: string) => string[]; }; }; /** * Provides access to the global profiler services. * * @see Profiler */ static readonly profiler: import("./utils/Types").ProfilerService<any>; /** * Clears the caches for normalizer, metric, and phonetic modules. */ static readonly clearCache: { normalizer: typeof Normalizer.clear; metric: typeof Metric.clear; phonetic: typeof Phonetic.clear; }; /** * Returns a TextAnalyzer instance for the given input string. * * @param {string} [input] - The input string * @returns {TextAnalyzer} - The text analyzer */ static analyze(input: string): TextAnalyzer; /** * Returns a DiffChecker instance for computing the unified diff between two texts. * * @param {string} a - The first (original) text * @param {string} b - The second (modified) text * @param {DiffOptions} [opt] - Optional diff configuration * @returns {DiffChecker} - The diff checker instance */ static diff(a: string, b: string, opt?: DiffOptions): DiffChecker; /** * -------------------------------------------------------------------------------- * Instanciate the CmpStr class * -------------------------------------------------------------------------------- * * Methods to create a new CmpStr instance with the given options. * Using the static `create` method is recommended to ensure proper instantiation. */ /** * Creates a new CmpStr instance with the given options. * * @param {string|CmpStrOptions} [opt] - Optional serialized or options object * @returns {CmpStr<R>} - A new CmpStr instance */ static create<R = MetricRaw>(opt?: string | CmpStrOptions): CmpStr<R>; protected options: CmpStrOptions; /** * Creates a new CmpStr instance with the given options. * The constructor is protected to enforce the use of the static `create` method. * * @param {string|CmpStrOptions} [opt] - Optional serialized or options object */ protected constructor(opt?: string | CmpStrOptions); /** * --------------------------------------------------------------------------------- * Protected utility methods for internal use * --------------------------------------------------------------------------------- * * These methods provide utility functions for converting inputs, merging options, * normalizing inputs, filtering, and preparing inputs for comparison. */ /** * Assert a condition and throws if the condition is not met. * * @param {string} cond - The condition to met * @param {any} [test] - Value to test for * @throws {Error} If the condition is not met */ protected assert(cond: string, test?: any): void; /** * Assert multiple conditions. * * @param {[ string, any? ][]} cond - Array of [ condition, value ] pairs */ protected assertMany(...cond: [string, any?][]): void; /** * Resolves the options for the CmpStr instance, merging the provided options with * the existing options. * * @param {CmpStrOptions} [opt] - Optional options to merge * @returns {CmpStrOptions} - The resolved options */ protected resolveOptions(opt?: CmpStrOptions): CmpStrOptions; /** * Normalizes the input string or array using the configured or provided flags. * * @param {MetricInput} input - The input string or array * @param {NormalizeFlags} [flags] - Normalization flags * @returns {MetricInput} - The normalized input */ protected normalize(input: MetricInput, flags?: NormalizeFlags): MetricInput; /** * Applies all active filters to the input string or array. * * @param {MetricInput} input - The input string or array * @param {string} [hook='input'] - The filter hook * @returns {MetricInput} - The filtered string(s) */ protected filter(input: MetricInput, hook: string): MetricInput; /** * Prepares the input by normalizing and filtering. * * @param {MetricInput} [input] - The input string or array * @param {CmpStrOptions} [opt] - Optional options to use * @returns {MetricInput} - The prepared input */ protected prepare(input: MetricInput, opt?: CmpStrOptions): MetricInput; /** * Post-process the results of the metric computation. * * @param {MetricResult<R>} result - The metric result * @returns {MetricResult<R>} - The post-processed results */ protected postProcess(result: MetricResult<R>, opt?: CmpStrOptions): MetricResult<R>; /** * Computes the phonetic index for the given input using the specified phonetic algorithm. * * @param {MetricInput} input - The input string or array * @param {{ algo: string, opt?: PhoneticOptions }} options - The phonetic algorithm and options * @returns {MetricInput} - The phonetic index for the given input */ protected index(input: MetricInput, { algo, opt }: { algo: string; opt?: PhoneticOptions; }): MetricInput; /** * Computes the metric result for the given inputs, applying normalization and * filtering as configured. * * @template T - The type of the metric result * @param {MetricInput} a - The first input string or array * @param {MetricInput} b - The second input string or array * @param {CmpStrOptions} [opt] - Optional options to use * @param {MetricMode} [mode='single'] - The metric mode to use * @param {boolean} [raw=false] - Whether to return raw results * @param {boolean} [skip=false] - Whether to skip normalization and filtering * @returns {T} - The computed metric result */ protected compute<T extends MetricResult<R> | CmpStrResult | CmpStrResult[]>(a: MetricInput, b: MetricInput, opt?: CmpStrOptions, mode?: MetricMode, raw?: boolean, skip?: boolean): T; /** * Resolves the result format (raw or formatted). * * @template T - The type of the metric result * @param {MetricResult<R>} result - The metric result * @param {boolean} [raw] - Whether to return raw results * @returns {T} - The resolved result */ protected output<T extends MetricResult<R> | CmpStrResult | CmpStrResult[]>(result: MetricResult<R>, raw?: boolean): T; /** * --------------------------------------------------------------------------------- * Managing methods for CmpStr * --------------------------------------------------------------------------------- * * These methods provides an interface to set and get properties of the CmpStr * instance, such as options, metric, phonetic algorithm, and more. */ /** * Creates a shallow clone of the current instance. * * @returns {CmpStr<R>} - The cloned instance */ clone(): CmpStr<R>; /** * Resets the instance, clearing all data and options. * * @returns {this} */ reset(): this; /** * Sets / replaces the full options object. * * @param {CmpStrOptions} opt - The options * @returns {this} */ setOptions(opt: CmpStrOptions): this; /** * Deep merges and sets new options. * * @param {CmpStrOptions} opt - The options to merge * @returns {this} */ mergeOptions(opt: CmpStrOptions): this; /** * Sets the serialized options from a JSON string. * * @param {string} opt - The serialized options * @returns {this} */ setSerializedOptions(opt: string): this; /** * Sets a specific option at the given path. * * @param {string} path - The path to the option * @param {any} value - The value to set * @returns {this} */ setOption(path: string, value: any): this; /** * Removes an option at the given path. * * @param {string} path - The path to the option * @returns {this} */ rmvOption(path: string): this; /** * Enable or disable raw output. * * @param {boolean} enable - Whether to enable or disable raw output * @returns {this} */ setRaw(enable: boolean): this; /** * Sets the similatity metric to use (e.g., 'levenshtein', 'dice'). * * @param {string} name - The metric name * @returns {this} */ setMetric(name: string): this; /** * Sets the normalization flags (e.g., 'itw', 'nfc'). * * @param {NormalizeFlags} flags - The normalization flags * @returns {this} */ setFlags(flags: NormalizeFlags): this; /** * Removes the normalization flags entirely. * * @return {this} */ rmvFlags(): this; /** * Sets the pre-processors to use for preparing the input. * * @param {CmpStrProcessors} opt - The processors to set * @returns {this} */ setProcessors(opt: CmpStrProcessors): this; /** * Removes the processors entirely. * * @returns {this} */ rmvProcessors(): this; /** * Returns the current options object. * * @returns {CmpStrOptions} - The options */ getOptions(): CmpStrOptions; /** * Returns the options as a JSON string. * * @returns {string} - The serialized options */ getSerializedOptions(): string; /** * Returns a specific option value by path. * * @param {string} path - The path to the option * @returns {any} - The option value */ getOption(path: string): any; /** * --------------------------------------------------------------------------------- * Public core methods for string comparison * --------------------------------------------------------------------------------- * * These methods provide the core functionality of the CmpStr class, allowing for * string comparison, phonetic indexing, filtering, and text search. */ /** * Performs a single metric comparison between the source and target. * * @template T - The type of the metric result * @param {string} a - The source string * @param {string} b - The target string * @param {CmpStrOptions} [opt] - Optional options * @returns {T} - The metric result */ test<T extends CmpStrResult | MetricResultSingle<R>>(a: string, b: string, opt?: CmpStrOptions): T; /** * Performs a single metric comparison and returns only the numeric score. * * @param {string} a - The source string * @param {string} b - The target string * @param {CmpStrOptions} [opt] - Optional options * @returns {number} - The similarity score (0..1) */ compare(a: string, b: string, opt?: CmpStrOptions): number; /** * Performs a batch metric comparison between source and target strings * or array of strings. * * @template T - The type of the metric result * @param {MetricInput} a - The source string or array of strings * @param {MetricInput} b - The target string or array of strings * @param {CmpStrOptions} [opt] - Optional options * @returns {T} - The batch metric results */ batchTest<T extends CmpStrResult[] | MetricResultBatch<R>>(a: MetricInput, b: MetricInput, opt?: CmpStrOptions): T; /** * Performs a batch metric comparison and returns results sorted by score. * * @template T - The type of the metric result * @param {MetricInput} a - The source string or array of strings * @param {MetricInput} b - The target string or array of strings * @param {'desc'|'asc'} [dir='desc'] - Sort direction (desc, asc) * @param {CmpStrOptions} [opt] - Optional options * @returns {T} - The sorted batch results */ batchSorted<T extends CmpStrResult[] | MetricResultBatch<R>>(a: MetricInput, b: MetricInput, dir?: 'desc' | 'asc', opt?: CmpStrOptions): T; /** * Performs a pairwise metric comparison between source and target strings * or array of strings. * * Input arrays needs of the same length to perform pairwise comparison, * otherwise the method will throw an error. * * @template T - The type of the metric result * @param {MetricInput} a - The source string or array of strings * @param {MetricInput} b - The target string or array of strings * @param {CmpStrOptions} [opt] - Optional options * @returns {T} - The pairwise metric results */ pairs<T extends CmpStrResult[] | MetricResultBatch<R>>(a: MetricInput, b: MetricInput, opt?: CmpStrOptions): T; /** * Performs a batch comparison and returns only results above the threshold. * * @template T - The type of the metric result * @param {MetricInput} a - The source string or array of strings * @param {MetricInput} b - The target string or array of strings * @param {number} threshold - The similarity threshold (0..1) * @param {CmpStrOptions} [opt] - Optional options * @returns {T} - The filtered batch results */ match<T extends CmpStrResult[] | MetricResultBatch<R>>(a: MetricInput, b: MetricInput, threshold: number, opt?: CmpStrOptions): T; /** * Returns the n closest matches from a batch comparison. * * @template T - The type of the metric result * @param {MetricInput} a - The source string or array of strings * @param {MetricInput} b - The target string or array of strings * @param {number} [n=1] - Number of closest matches * @param {CmpStrOptions} [opt] - Optional options * @returns {T} - The closest matches */ closest<T extends CmpStrResult[] | MetricResultBatch<R>>(a: MetricInput, b: MetricInput, n?: number, opt?: CmpStrOptions): T; /** * Returns the n furthest matches from a batch comparison. * * @template T - The type of the metric result * @param {MetricInput} a - The source string or array of strings * @param {MetricInput} b - The target string or array of strings * @param {number} [n=1] - Number of furthest matches * @param {CmpStrOptions} [opt] - Optional options * @returns {T} - The furthest matches */ furthest<T extends CmpStrResult[] | MetricResultBatch<R>>(a: MetricInput, b: MetricInput, n?: number, opt?: CmpStrOptions): T; /** * Performs a normalized and filtered substring search. * * @param {string} needle - The search string * @param {string[]} haystack - The array to search in * @param {NormalizeFlags} [flags] - Normalization flags * @param {CmpStrProcessors} [processors] - Pre-processors to apply * @returns {string[]} - Array of matching entries */ search(needle: string, haystack: string[], flags?: NormalizeFlags, processors?: CmpStrProcessors): string[]; /** * Computes a similarity matrix for the given input array. * * @param {string[]} input - The input array * @param {CmpStrOptions} [opt] - Optional options * @returns {number[][]} - The similarity matrix */ matrix(input: string[], opt?: CmpStrOptions): number[][]; /** * Computes the phonetic index for a string using the configured * or given algorithm. * * @param {string} [input] - The input string * @param {string} [algo] - The phonetic algorithm to use * @param {PhoneticOptions} [opt] - Optional phonetic options * @returns {string} - The phonetic index as a string */ phoneticIndex(input: string, algo?: string, opt?: PhoneticOptions): string; }