cmpstr
Version:
CmpStr is a lightweight, fast and well performing package for calculating string similarity
473 lines (472 loc) • 19.1 kB
TypeScript
/**
* CmpStr Main API
* src/CmpStr.ts
*
* The CmpStr class provides a comprehensive, highly abstracted, and type-safe interface
* for string comparison, similarity measurement, phonetic indexing, filtering, normalization,
* and text analysis. It unifies all core features of the CmpStr package and exposes a
* consistent, user-friendly API for both single and batch operations.
*
* Features:
* - Centralized management of metrics, phonetic algorithms, and filters
* - Flexible normalization and filtering pipeline for all inputs
* - Batch, pairwise, and single string comparison with detailed results
* - Phonetic indexing and phonetic-aware search and comparison
* - Text analysis and unified diff utilities
* - Full TypeScript type safety and extensibility
*
* @module CmpStr
* @author Paul Köhler (komed3)
* @license MIT
*/
import type { CmpStrOptions, CmpStrProcessors, CmpStrResult, NormalizeFlags, DiffOptions, PhoneticOptions, MetricRaw, MetricInput, MetricMode, MetricResult, MetricResultSingle, MetricResultBatch } from './utils/Types';
import { TextAnalyzer } from './utils/TextAnalyzer';
import { DiffChecker } from './utils/DiffChecker';
import { Normalizer } from './utils/Normalizer';
import { Filter } from './utils/Filter';
import { Metric } from './metric';
import { Phonetic } from './phonetic';
/**
* The main CmpStr class that provides a unified interface for string comparison,
* phonetic indexing, filtering, and text analysis.
*
* @template R - The type of the metric result, defaults to MetricRaw
*/
export declare class CmpStr<R = MetricRaw> {
/**
* --------------------------------------------------------------------------------
* Static methods and properties for global access to CmpStr features
* --------------------------------------------------------------------------------
*
* These static methods provide a convenient way to access the core features of
* the CmpStr package without needing to instantiate a CmpStr object.
*/
/**
* Adds, removes, pauses, resumes, lists, or clears global filters.
*
* @see Filter
*/
static readonly filter: {
add: typeof Filter.add;
remove: typeof Filter.remove;
pause: typeof Filter.pause;
resume: typeof Filter.resume;
list: typeof Filter.list;
clear: typeof Filter.clear;
};
/**
* Adds, removes, checks, or lists available metrics.
*
* @see MetricRegistry
*/
static readonly metric: {
add: (name: string, cls: import("./utils/Types").RegistryConstructor<Metric<MetricRaw>>, update?: boolean) => void;
remove: (name: string) => void;
has: (name: string) => boolean;
list: () => string[];
};
/**
* Adds, removes, checks, or lists available phonetic algorithms and mappings.
*
* @see PhoneticRegistry
*/
static readonly phonetic: {
add: (name: string, cls: import("./utils/Types").RegistryConstructor<Phonetic>, update?: boolean) => void;
remove: (name: string) => void;
has: (name: string) => boolean;
list: () => string[];
map: {
add: (algo: string, id: string, map: import("./utils/Types").PhoneticMap, update?: boolean) => void;
remove: (algo: string, id: string) => void;
has: (algo: string, id: string) => boolean;
list: (algo: string) => string[];
};
};
/**
* Provides access to the global profiler services.
*
* @see Profiler
*/
static readonly profiler: import("./utils/Types").ProfilerService<any>;
/**
* Clears the caches for normalizer, metric, and phonetic modules.
*/
static readonly clearCache: {
normalizer: typeof Normalizer.clear;
metric: typeof Metric.clear;
phonetic: typeof Phonetic.clear;
};
/**
* Returns a TextAnalyzer instance for the given input string.
*
* @param {string} [input] - The input string
* @returns {TextAnalyzer} - The text analyzer
*/
static analyze(input: string): TextAnalyzer;
/**
* Returns a DiffChecker instance for computing the unified diff between two texts.
*
* @param {string} a - The first (original) text
* @param {string} b - The second (modified) text
* @param {DiffOptions} [opt] - Optional diff configuration
* @returns {DiffChecker} - The diff checker instance
*/
static diff(a: string, b: string, opt?: DiffOptions): DiffChecker;
/**
* --------------------------------------------------------------------------------
* Instanciate the CmpStr class
* --------------------------------------------------------------------------------
*
* Methods to create a new CmpStr instance with the given options.
* Using the static `create` method is recommended to ensure proper instantiation.
*/
/**
* Creates a new CmpStr instance with the given options.
*
* @param {string|CmpStrOptions} [opt] - Optional serialized or options object
* @returns {CmpStr<R>} - A new CmpStr instance
*/
static create<R = MetricRaw>(opt?: string | CmpStrOptions): CmpStr<R>;
protected options: CmpStrOptions;
/**
* Creates a new CmpStr instance with the given options.
* The constructor is protected to enforce the use of the static `create` method.
*
* @param {string|CmpStrOptions} [opt] - Optional serialized or options object
*/
protected constructor(opt?: string | CmpStrOptions);
/**
* ---------------------------------------------------------------------------------
* Protected utility methods for internal use
* ---------------------------------------------------------------------------------
*
* These methods provide utility functions for converting inputs, merging options,
* normalizing inputs, filtering, and preparing inputs for comparison.
*/
/**
* Assert a condition and throws if the condition is not met.
*
* @param {string} cond - The condition to met
* @param {any} [test] - Value to test for
* @throws {Error} If the condition is not met
*/
protected assert(cond: string, test?: any): void;
/**
* Assert multiple conditions.
*
* @param {[ string, any? ][]} cond - Array of [ condition, value ] pairs
*/
protected assertMany(...cond: [string, any?][]): void;
/**
* Resolves the options for the CmpStr instance, merging the provided options with
* the existing options.
*
* @param {CmpStrOptions} [opt] - Optional options to merge
* @returns {CmpStrOptions} - The resolved options
*/
protected resolveOptions(opt?: CmpStrOptions): CmpStrOptions;
/**
* Normalizes the input string or array using the configured or provided flags.
*
* @param {MetricInput} input - The input string or array
* @param {NormalizeFlags} [flags] - Normalization flags
* @returns {MetricInput} - The normalized input
*/
protected normalize(input: MetricInput, flags?: NormalizeFlags): MetricInput;
/**
* Applies all active filters to the input string or array.
*
* @param {MetricInput} input - The input string or array
* @param {string} [hook='input'] - The filter hook
* @returns {MetricInput} - The filtered string(s)
*/
protected filter(input: MetricInput, hook: string): MetricInput;
/**
* Prepares the input by normalizing and filtering.
*
* @param {MetricInput} [input] - The input string or array
* @param {CmpStrOptions} [opt] - Optional options to use
* @returns {MetricInput} - The prepared input
*/
protected prepare(input: MetricInput, opt?: CmpStrOptions): MetricInput;
/**
* Post-process the results of the metric computation.
*
* @param {MetricResult<R>} result - The metric result
* @returns {MetricResult<R>} - The post-processed results
*/
protected postProcess(result: MetricResult<R>, opt?: CmpStrOptions): MetricResult<R>;
/**
* Computes the phonetic index for the given input using the specified phonetic algorithm.
*
* @param {MetricInput} input - The input string or array
* @param {{ algo: string, opt?: PhoneticOptions }} options - The phonetic algorithm and options
* @returns {MetricInput} - The phonetic index for the given input
*/
protected index(input: MetricInput, { algo, opt }: {
algo: string;
opt?: PhoneticOptions;
}): MetricInput;
/**
* Computes the metric result for the given inputs, applying normalization and
* filtering as configured.
*
* @template T - The type of the metric result
* @param {MetricInput} a - The first input string or array
* @param {MetricInput} b - The second input string or array
* @param {CmpStrOptions} [opt] - Optional options to use
* @param {MetricMode} [mode='single'] - The metric mode to use
* @param {boolean} [raw=false] - Whether to return raw results
* @param {boolean} [skip=false] - Whether to skip normalization and filtering
* @returns {T} - The computed metric result
*/
protected compute<T extends MetricResult<R> | CmpStrResult | CmpStrResult[]>(a: MetricInput, b: MetricInput, opt?: CmpStrOptions, mode?: MetricMode, raw?: boolean, skip?: boolean): T;
/**
* Resolves the result format (raw or formatted).
*
* @template T - The type of the metric result
* @param {MetricResult<R>} result - The metric result
* @param {boolean} [raw] - Whether to return raw results
* @returns {T} - The resolved result
*/
protected output<T extends MetricResult<R> | CmpStrResult | CmpStrResult[]>(result: MetricResult<R>, raw?: boolean): T;
/**
* ---------------------------------------------------------------------------------
* Managing methods for CmpStr
* ---------------------------------------------------------------------------------
*
* These methods provides an interface to set and get properties of the CmpStr
* instance, such as options, metric, phonetic algorithm, and more.
*/
/**
* Creates a shallow clone of the current instance.
*
* @returns {CmpStr<R>} - The cloned instance
*/
clone(): CmpStr<R>;
/**
* Resets the instance, clearing all data and options.
*
* @returns {this}
*/
reset(): this;
/**
* Sets / replaces the full options object.
*
* @param {CmpStrOptions} opt - The options
* @returns {this}
*/
setOptions(opt: CmpStrOptions): this;
/**
* Deep merges and sets new options.
*
* @param {CmpStrOptions} opt - The options to merge
* @returns {this}
*/
mergeOptions(opt: CmpStrOptions): this;
/**
* Sets the serialized options from a JSON string.
*
* @param {string} opt - The serialized options
* @returns {this}
*/
setSerializedOptions(opt: string): this;
/**
* Sets a specific option at the given path.
*
* @param {string} path - The path to the option
* @param {any} value - The value to set
* @returns {this}
*/
setOption(path: string, value: any): this;
/**
* Removes an option at the given path.
*
* @param {string} path - The path to the option
* @returns {this}
*/
rmvOption(path: string): this;
/**
* Enable or disable raw output.
*
* @param {boolean} enable - Whether to enable or disable raw output
* @returns {this}
*/
setRaw(enable: boolean): this;
/**
* Sets the similatity metric to use (e.g., 'levenshtein', 'dice').
*
* @param {string} name - The metric name
* @returns {this}
*/
setMetric(name: string): this;
/**
* Sets the normalization flags (e.g., 'itw', 'nfc').
*
* @param {NormalizeFlags} flags - The normalization flags
* @returns {this}
*/
setFlags(flags: NormalizeFlags): this;
/**
* Removes the normalization flags entirely.
*
* @return {this}
*/
rmvFlags(): this;
/**
* Sets the pre-processors to use for preparing the input.
*
* @param {CmpStrProcessors} opt - The processors to set
* @returns {this}
*/
setProcessors(opt: CmpStrProcessors): this;
/**
* Removes the processors entirely.
*
* @returns {this}
*/
rmvProcessors(): this;
/**
* Returns the current options object.
*
* @returns {CmpStrOptions} - The options
*/
getOptions(): CmpStrOptions;
/**
* Returns the options as a JSON string.
*
* @returns {string} - The serialized options
*/
getSerializedOptions(): string;
/**
* Returns a specific option value by path.
*
* @param {string} path - The path to the option
* @returns {any} - The option value
*/
getOption(path: string): any;
/**
* ---------------------------------------------------------------------------------
* Public core methods for string comparison
* ---------------------------------------------------------------------------------
*
* These methods provide the core functionality of the CmpStr class, allowing for
* string comparison, phonetic indexing, filtering, and text search.
*/
/**
* Performs a single metric comparison between the source and target.
*
* @template T - The type of the metric result
* @param {string} a - The source string
* @param {string} b - The target string
* @param {CmpStrOptions} [opt] - Optional options
* @returns {T} - The metric result
*/
test<T extends CmpStrResult | MetricResultSingle<R>>(a: string, b: string, opt?: CmpStrOptions): T;
/**
* Performs a single metric comparison and returns only the numeric score.
*
* @param {string} a - The source string
* @param {string} b - The target string
* @param {CmpStrOptions} [opt] - Optional options
* @returns {number} - The similarity score (0..1)
*/
compare(a: string, b: string, opt?: CmpStrOptions): number;
/**
* Performs a batch metric comparison between source and target strings
* or array of strings.
*
* @template T - The type of the metric result
* @param {MetricInput} a - The source string or array of strings
* @param {MetricInput} b - The target string or array of strings
* @param {CmpStrOptions} [opt] - Optional options
* @returns {T} - The batch metric results
*/
batchTest<T extends CmpStrResult[] | MetricResultBatch<R>>(a: MetricInput, b: MetricInput, opt?: CmpStrOptions): T;
/**
* Performs a batch metric comparison and returns results sorted by score.
*
* @template T - The type of the metric result
* @param {MetricInput} a - The source string or array of strings
* @param {MetricInput} b - The target string or array of strings
* @param {'desc'|'asc'} [dir='desc'] - Sort direction (desc, asc)
* @param {CmpStrOptions} [opt] - Optional options
* @returns {T} - The sorted batch results
*/
batchSorted<T extends CmpStrResult[] | MetricResultBatch<R>>(a: MetricInput, b: MetricInput, dir?: 'desc' | 'asc', opt?: CmpStrOptions): T;
/**
* Performs a pairwise metric comparison between source and target strings
* or array of strings.
*
* Input arrays needs of the same length to perform pairwise comparison,
* otherwise the method will throw an error.
*
* @template T - The type of the metric result
* @param {MetricInput} a - The source string or array of strings
* @param {MetricInput} b - The target string or array of strings
* @param {CmpStrOptions} [opt] - Optional options
* @returns {T} - The pairwise metric results
*/
pairs<T extends CmpStrResult[] | MetricResultBatch<R>>(a: MetricInput, b: MetricInput, opt?: CmpStrOptions): T;
/**
* Performs a batch comparison and returns only results above the threshold.
*
* @template T - The type of the metric result
* @param {MetricInput} a - The source string or array of strings
* @param {MetricInput} b - The target string or array of strings
* @param {number} threshold - The similarity threshold (0..1)
* @param {CmpStrOptions} [opt] - Optional options
* @returns {T} - The filtered batch results
*/
match<T extends CmpStrResult[] | MetricResultBatch<R>>(a: MetricInput, b: MetricInput, threshold: number, opt?: CmpStrOptions): T;
/**
* Returns the n closest matches from a batch comparison.
*
* @template T - The type of the metric result
* @param {MetricInput} a - The source string or array of strings
* @param {MetricInput} b - The target string or array of strings
* @param {number} [n=1] - Number of closest matches
* @param {CmpStrOptions} [opt] - Optional options
* @returns {T} - The closest matches
*/
closest<T extends CmpStrResult[] | MetricResultBatch<R>>(a: MetricInput, b: MetricInput, n?: number, opt?: CmpStrOptions): T;
/**
* Returns the n furthest matches from a batch comparison.
*
* @template T - The type of the metric result
* @param {MetricInput} a - The source string or array of strings
* @param {MetricInput} b - The target string or array of strings
* @param {number} [n=1] - Number of furthest matches
* @param {CmpStrOptions} [opt] - Optional options
* @returns {T} - The furthest matches
*/
furthest<T extends CmpStrResult[] | MetricResultBatch<R>>(a: MetricInput, b: MetricInput, n?: number, opt?: CmpStrOptions): T;
/**
* Performs a normalized and filtered substring search.
*
* @param {string} needle - The search string
* @param {string[]} haystack - The array to search in
* @param {NormalizeFlags} [flags] - Normalization flags
* @param {CmpStrProcessors} [processors] - Pre-processors to apply
* @returns {string[]} - Array of matching entries
*/
search(needle: string, haystack: string[], flags?: NormalizeFlags, processors?: CmpStrProcessors): string[];
/**
* Computes a similarity matrix for the given input array.
*
* @param {string[]} input - The input array
* @param {CmpStrOptions} [opt] - Optional options
* @returns {number[][]} - The similarity matrix
*/
matrix(input: string[], opt?: CmpStrOptions): number[][];
/**
* Computes the phonetic index for a string using the configured
* or given algorithm.
*
* @param {string} [input] - The input string
* @param {string} [algo] - The phonetic algorithm to use
* @param {PhoneticOptions} [opt] - Optional phonetic options
* @returns {string} - The phonetic index as a string
*/
phoneticIndex(input: string, algo?: string, opt?: PhoneticOptions): string;
}