cmpstr
Version:
CmpStr is a lightweight, fast and well performing package for calculating string similarity
77 lines (76 loc) • 3.38 kB
TypeScript
/**
* Normalizer Utility
* src/utils/Normalizer.ts
*
* @see https://en.wikipedia.org/wiki/Text_normalization
* @see https://en.wikipedia.org/wiki/Unicode_equivalence
*
* This module provides a Normalizer class that allows for string normalization based
* on various flags. It uses a pipeline of normalization functions that can be reused
* and cached for efficiency. The Normalizer can handle both single strings and arrays
* of strings, and supports synchronous and asynchronous normalization.
*
* Supported flags:
* 'd' :: Normalize to NFD (Normalization Form Decomposed)
* 'u' :: Normalize to NFC (Normalization Form Composed)
* 'x' :: Normalize to NFKC (Normalization Form Compatibility Composed)
* 'w' :: Collapse whitespace
* 't' :: Remove leading and trailing whitespace
* 'r' :: Remove double characters
* 's' :: Remove punctuation / special characters
* 'k' :: Remove non-letter characters
* 'n' :: Remove non-number characters
* 'i' :: Case insensitive (convert to lowercase)
*
* @module Utils/Normalizer
* @author Paul Köhler (komed3)
* @license MIT
*/
import type { NormalizeFlags } from './Types';
/**
* The Normalizer class providing methods to normalize strings based on various flags.
*/
export declare class Normalizer {
/**
* A map that holds normalization functions based on the flags.
* This allows for reusing normalization logic without recomputing it.
*/
private static pipeline;
/**
* A cache to store normalized strings based on the flags and input.
* This helps avoid recomputing normalization for the same input and flags.
*/
private static cache;
/**
* Returns a normalization function based on the provided flags.
* The flags are a string of characters that define the normalization steps.
*
* @param {NormalizeFlags} flags - A string of characters representing the normalization steps
* @returns {NormalizerFn} - A function that normalizes a string based on the provided flags
*/
private static getPipeline;
/**
* Normalizes the input string or array of strings based on the provided flags.
* The flags are a string of characters that define the normalization steps.
*
* @param {string|string[]} input - The string or array of strings to normalize
* @param {NormalizeFlags} flags - A string of characters representing the normalization steps
* @returns {string|string[]} - The normalized string(s)
*/
static normalize(input: string | string[], flags: NormalizeFlags): string | string[];
/**
* Asynchronously normalizes the input string or array of strings based on the
* provided flags. This method is useful for handling large inputs or when
* normalization needs to be done in a non-blocking way.
*
* @param {string|string[]} input - The string or array of strings to normalize
* @param {NormalizeFlags} flags - A string of characters representing the normalization steps
* @returns {Promise<string|string[]>} - A promise that resolves to the normalized string(s)
*/
static normalizeAsync(input: string | string[], flags: NormalizeFlags): Promise<string | string[]>;
/**
* Clears the normalization pipeline and cache.
* This is useful for resetting the state of the Normalizer.
*/
static clear(): void;
}