UNPKG

cmpstr

Version:

CmpStr is a lightweight, fast and well performing package for calculating string similarity

77 lines (76 loc) 3.38 kB
/** * Normalizer Utility * src/utils/Normalizer.ts * * @see https://en.wikipedia.org/wiki/Text_normalization * @see https://en.wikipedia.org/wiki/Unicode_equivalence * * This module provides a Normalizer class that allows for string normalization based * on various flags. It uses a pipeline of normalization functions that can be reused * and cached for efficiency. The Normalizer can handle both single strings and arrays * of strings, and supports synchronous and asynchronous normalization. * * Supported flags: * 'd' :: Normalize to NFD (Normalization Form Decomposed) * 'u' :: Normalize to NFC (Normalization Form Composed) * 'x' :: Normalize to NFKC (Normalization Form Compatibility Composed) * 'w' :: Collapse whitespace * 't' :: Remove leading and trailing whitespace * 'r' :: Remove double characters * 's' :: Remove punctuation / special characters * 'k' :: Remove non-letter characters * 'n' :: Remove non-number characters * 'i' :: Case insensitive (convert to lowercase) * * @module Utils/Normalizer * @author Paul Köhler (komed3) * @license MIT */ import type { NormalizeFlags } from './Types'; /** * The Normalizer class providing methods to normalize strings based on various flags. */ export declare class Normalizer { /** * A map that holds normalization functions based on the flags. * This allows for reusing normalization logic without recomputing it. */ private static pipeline; /** * A cache to store normalized strings based on the flags and input. * This helps avoid recomputing normalization for the same input and flags. */ private static cache; /** * Returns a normalization function based on the provided flags. * The flags are a string of characters that define the normalization steps. * * @param {NormalizeFlags} flags - A string of characters representing the normalization steps * @returns {NormalizerFn} - A function that normalizes a string based on the provided flags */ private static getPipeline; /** * Normalizes the input string or array of strings based on the provided flags. * The flags are a string of characters that define the normalization steps. * * @param {string|string[]} input - The string or array of strings to normalize * @param {NormalizeFlags} flags - A string of characters representing the normalization steps * @returns {string|string[]} - The normalized string(s) */ static normalize(input: string | string[], flags: NormalizeFlags): string | string[]; /** * Asynchronously normalizes the input string or array of strings based on the * provided flags. This method is useful for handling large inputs or when * normalization needs to be done in a non-blocking way. * * @param {string|string[]} input - The string or array of strings to normalize * @param {NormalizeFlags} flags - A string of characters representing the normalization steps * @returns {Promise<string|string[]>} - A promise that resolves to the normalized string(s) */ static normalizeAsync(input: string | string[], flags: NormalizeFlags): Promise<string | string[]>; /** * Clears the normalization pipeline and cache. * This is useful for resetting the state of the Normalizer. */ static clear(): void; }