cmpstr
Version:
CmpStr is a lightweight, fast and well performing package for calculating string similarity
482 lines (481 loc) • 14 kB
TypeScript
/**
* Types and Interfaces for CmpStr
* src/utils/Types.ts
*
* This file defines all core types, interfaces, and utility type aliases used throughout
* the CmpStr package. It provides type safety and documentation for all major components,
* including metrics, phonetic algorithms, filters, normalization, diffing, and profiling.
*
* All interfaces are designed for extensibility and clarity, supporting both internal
* implementation and external usage in user code.
*
* @module Utils
* @name Types
* @author Paul Köhler (komed3)
* @license MIT
*/
/**
* ================================================================================
* ERROR HANDLING
* ================================================================================
*
* Types for standardized error classes and structured error metadata.
*/
/**
* Metadata attached to a CmpStr error.
*/
export type CmpStrErrorMeta = Record<string, any> | undefined;
/**
* Structured format for CmpStr errors when serialized to JSON.
*/
export interface CmpStrErrorJSON {
name: string;
code: string;
message: string;
meta?: CmpStrErrorMeta;
when: string;
cause?: {
name: string;
message: string;
stack?: string;
} | unknown;
}
/**
* ================================================================================
* PROFILER & POOL UTILITIES
* ================================================================================
*
* Types for performance profiling and buffer pool management.
*/
/**
* ProfilerEntry represents a single profiling result, including execution time,
* memory usage, the result of the profiled function, and optional metadata.
*
* @template T - The type of the profiled result
*/
export interface ProfilerEntry<T> {
time: number;
mem: number;
res: T;
meta?: Record<string, any>;
}
/**
* ProfilerService defines the API for the Profiler utility, providing methods
* to enable/disable profiling, clear results, retrieve reports, and get totals.
*
* @template T - The type of the profiled result
*/
export interface ProfilerService<T> {
enable: () => void;
disable: () => void;
clear: () => void;
report: () => ProfilerEntry<T>[];
last: () => ProfilerEntry<T> | undefined;
total: () => {
time: number;
mem: number;
};
}
/**
* PoolType enumerates the supported buffer types for the Pool utility.
*/
export type PoolType = 'int32' | 'number[]' | 'string[]' | 'set' | 'map';
/**
* PoolConfig defines the configuration for a buffer pool.
*/
export interface PoolConfig {
type: PoolType;
maxSize: number;
maxItemSize: number;
allowOversize: boolean;
}
/**
* PoolBuffer represents a buffer and its size in the pool.
*
* @template T - The buffer type
*/
export interface PoolBuffer<T> {
buffer: T;
size: number;
}
/**
* ================================================================================
* NORMALIZATION & FILTERING
* ================================================================================
*
* Types for input normalization and custom filtering pipelines.
*/
/**
* NormalizerFn defines the signature for a normalization function.
* It takes a string and returns a normalized string.
*/
export type NormalizerFn = (input: string) => string;
/**
* NormalizeFlags is a string representing a sequence of normalization steps.
* Each character or substring corresponds to a specific normalization operation.
*/
export type NormalizeFlags = string;
/**
* FilterHooks enumerates the supported filter hook names.
* Allows for custom string keys as well.
*/
export type FilterHooks = 'input' | (string & {});
/**
* FilterFn defines the signature for a filter function.
* It takes a string and returns the filtered string.
*/
export type FilterFn = (input: string) => string;
/**
* FilterOptions configures the behavior of a filter entry.
*/
export interface FilterOptions {
priority?: number;
active?: boolean;
overrideable?: boolean;
}
/**
* FilterEntry represents a single filter in the filter system.
*/
export interface FilterEntry {
id: string;
fn: FilterFn;
priority: number;
active: boolean;
overrideable: boolean;
}
/**
* ================================================================================
* REGISTRIES
* ================================================================================
*
* Types for managing registries of metrics, phonetic algorithms, and more.
*/
/**
* RegistryConstructor is a type alias for a class constructor used in registries.
*
* @template T - The class type
*/
export type RegistryConstructor<T> = abstract new (...args: any[]) => T;
/**
* RegistryService defines the API for a generic registry of classes.
*
* @template T - The class type managed by the registry
*/
export interface RegistryService<T> {
add: (name: string, cls: RegistryConstructor<T>, update?: boolean) => void;
remove: (name: string) => void;
has: (name: string) => boolean;
get: (name: string) => RegistryConstructor<T>;
list: () => string[];
}
/**
* ================================================================================
* METRICS
* ================================================================================
*
* Types for string similarity metrics and comparison operations.
*/
/**
* MetricInput represents the input for metric computations.
* It can be a single string or an array of strings.
*/
export type MetricInput = string | string[];
/**
* MetricMode specifies the computation mode for metrics.
* - 'default': single or batch comparison related to input
* - 'batch': compare multiple strings
* - 'single': runs a single comparison
* - 'pairwise': compare arrays element-wise
*/
export type MetricMode = 'default' | 'batch' | 'single' | 'pairwise';
/**
* MetricOptions configures the behavior of metric computations.
*/
export interface MetricOptions {
mode?: MetricMode;
delimiter?: string;
pad?: string;
q?: number;
match?: number;
mismatch?: number;
gap?: number;
}
/**
* MetricRaw is a generic record for storing raw metric-specific data.
*/
export type MetricRaw = Record<string, any>;
/**
* MetricCompute represents the result of a metric computation.
*
* @template R - The type of the raw result
*/
export interface MetricCompute<R = MetricRaw> {
res: number;
raw?: R;
}
/**
* MetricResultSingle represents the result of a single metric comparison.
*
* @template R - The type of the raw result
*/
export interface MetricResultSingle<R = MetricRaw> {
metric: string;
a: string;
b: string;
res: number;
raw?: R;
}
/**
* MetricResultBatch is an array of single metric results for batch operations.
*
* @template R - The type of the raw result
*/
export type MetricResultBatch<R = MetricRaw> = MetricResultSingle<R>[];
/**
* MetricResult is a union of single and batch metric results.
*
* @template R - The type of the raw result
*/
export type MetricResult<R = MetricRaw> = MetricResultSingle<R> | MetricResultBatch<R>;
/**
* IndexedResult is a MetricResultSingle with optional index metadata.
*
* @template R - The type of the raw result
*/
export type IndexedResult<R = MetricRaw> = MetricResultSingle<R> & {
__idx?: number;
};
/**
* ================================================================================
* CMPSTR RESULT TYPES
* ================================================================================
*
* Types for CmpStr results and their various forms.
*/
/**
* CmpStrResult represents a simplified result for user-facing API methods.
*/
export interface CmpStrResult {
source: string;
target: string;
match: number;
}
/**
* ResultLike represents the possible return types for comparison functions.
*
* @template R - The type of the raw result
*/
export type ResultLike<R = MetricRaw> = CmpStrResult | MetricResultSingle<R>;
/**
* BatchResultLike represents the possible return types for batch comparison functions.
*
* @template R - The type of the raw result
*/
export type BatchResultLike<R = MetricRaw> = CmpStrResult[] | MetricResultBatch<R>;
/**
* CompareFnResult represents the possible return types for comparison functions.
*
* @template R - The type of the raw result
*/
export type CmpFnResult<R> = MetricResultSingle<R>[] | (CmpStrResult & {
raw?: R;
})[] | null | undefined;
/**
* ================================================================================
* PHONETIC ALGORITHMS
* ================================================================================
*
* Types for phonetic indexing, mapping, and phonetic-aware comparisons.
*/
/**
* PhoneticOptions configures the behavior of phonetic algorithms.
*/
export interface PhoneticOptions {
map?: string;
delimiter?: string;
length?: number;
pad?: string;
dedupe?: boolean;
fallback?: string | undefined;
}
/**
* PhoneticRule defines a single rule for phonetic mapping.
*/
export interface PhoneticRule {
char: string;
code: string;
position?: 'start' | 'middle' | 'end';
prev?: string[];
prevNot?: string[];
prev2?: string[];
prev2Not?: string[];
next?: string[];
nextNot?: string[];
next2?: string[];
next2Not?: string[];
leading?: string;
trailing?: string;
match?: string[];
}
/**
* PhoneticPattern defines a single pattern for phonetic mapping.
*/
export interface PhoneticPattern {
pattern: RegExp;
replace: string;
all?: boolean;
}
/**
* PhoneticMap defines a mapping for a specific phonetic algorithm and language.
*/
export interface PhoneticMap {
map: Record<string, string>;
patterns?: PhoneticPattern[];
ruleset?: PhoneticRule[];
ignore?: string[];
options?: PhoneticOptions;
}
/**
* PhoneticMapping is a record of named phonetic maps for an algorithm.
*/
export type PhoneticMapping = Record<string, PhoneticMap>;
/**
* PhoneticMappingService defines the API for managing phonetic mappings.
*/
export interface PhoneticMappingService {
add: (algo: string, id: string, map: PhoneticMap, update?: boolean) => void;
remove: (algo: string, id: string) => void;
has: (algo: string, id: string) => boolean;
get: (algo: string, id: string) => PhoneticMap | undefined;
list: (algo: string) => string[];
}
/**
* ================================================================================
* DIFF & TEXT ANALYSIS
* ================================================================================
*
* Types for unified diff computation and text comparison.
*/
/**
* DiffMode specifies the granularity for diffing.
* - 'line': line-based diff
* - 'word': word-based diff
*/
export type DiffMode = 'line' | 'word';
/**
* DiffOptions configures the behavior of the DiffChecker utility.
*/
export interface DiffOptions {
mode?: DiffMode;
caseInsensitive?: boolean;
contextLines?: number;
groupedLines?: boolean;
expandLines?: boolean;
showChangeMagnitude?: boolean;
maxMagnitudeSymbols?: number;
lineBreak?: string;
}
/**
* DiffEntry represents a single change (insertion or deletion) in a diff.
*/
export interface DiffEntry {
posA: number;
posB: number;
del: string;
ins: string;
size: number;
}
/**
* DiffLine represents the diff for a single line, including all changes.
*/
export interface DiffLine {
line: number;
diffs: DiffEntry[];
delSize: number;
insSize: number;
totalSize: number;
baseLen: number;
magnitude: string;
}
/**
* DiffGroup represents a group of adjacent changed lines in a diff.
*/
export interface DiffGroup {
line: number;
start: number;
end: number;
entries: DiffLine[];
delSize: number;
insSize: number;
totalSize: number;
magnitude: string;
}
/**
* ================================================================================
* CMPSTR CONFIGURATION
* ================================================================================
*
* Types for configuring CmpStr behavior and options.
*/
/**
* CmpStrProcessors defines pre-processors for input strings before comparison.
*/
export interface CmpStrProcessors {
phonetic?: {
algo: string;
opt?: PhoneticOptions;
};
}
/**
* CmpStrOptions configures the behavior of a CmpStr instance.
*/
export interface CmpStrOptions {
raw?: boolean;
removeZero?: boolean;
flags?: NormalizeFlags;
metric?: string;
opt?: MetricOptions;
processors?: CmpStrProcessors;
output?: 'orig' | 'prep';
safeEmpty?: boolean;
}
/**
* ================================================================================
* STRUCTURED DATA
* ================================================================================
*
* Types for comparing structured data objects by extracting properties.
*/
/**
* StructuredDataResult represents a lookup result with original object attached.
*
* @template T - The type of the original object
* @template R - The type of the metric raw result
*/
export interface StructuredDataResult<T = any, R = MetricRaw> {
obj: T;
key: keyof T;
result: CmpStrResult;
raw?: R;
}
/**
* StructuredDataBatchResult is an array of lookup results.
*
* @template T - The type of the original object
* @template R - The type of the metric raw result
*/
export type StructuredDataBatchResult<T = any, R = MetricRaw> = StructuredDataResult<T, R>[];
/**
* StructuredResultLike represents the possible return types for structured data lookups.
*
* @template T - The type of the original object
* @template R - The type of the metric raw result
*/
export type StructuredResultLike<T = any, R = MetricRaw> = StructuredDataBatchResult<T, R> | T[];
/**
* StructuredDataOptions configures the lookup behavior.
*/
export interface StructuredDataOptions extends Omit<CmpStrOptions, 'raw'> {
sort?: boolean | 'asc' | 'desc';
objectsOnly?: boolean;
}