cmpstr
Version:
CmpStr is a lightweight, fast and well performing package for calculating string similarity
190 lines (189 loc) • 8.07 kB
TypeScript
/**
* Abstract Phonetic
* src/phonetic/Phonetic.ts
*
* @see https://en.wikipedia.org/wiki/Phonetic_algorithm
*
* A phonetic algorithm refers to a method for indexing words according to their
* pronunciation. When the algorithm relies on orthography, it is significantly
* influenced by the spelling conventions of the language for which it is intended:
* since the majority of phonetic algorithms were created for English, they tend
* to be less effective for indexing words in other languages.
*
* Phonetic search has numerous applications, and one of the initial use cases has
* been in trademark searches to verify that newly registered trademarks do not
* pose a risk of infringing upon existing trademarks due to their pronunciation.
*
* This module provides an abstract class for generating phonetic indices based
* on mappings and rules. It allows for the implementation of various phonetic
* algorithms by extending the abstract class.
*
* @module Phonetic
* @author Paul Köhler (komed3)
* @license MIT
*/
import type { PhoneticMap, PhoneticOptions, RegistryService, PhoneticMappingService } from '../utils/Types';
/**
* Abstract class representing a phonetic algorithm.
*
* The protected methods `applyRules`, `encode`, `mapChar`, `equalLen`, `word2Chars`,
* `exitEarly`, `adjustCode`, `loop` and `loopAsync` can be overridden in subclasses
* to implement specific phonetic algorithms.
*
* @abstract
*/
export declare abstract class Phonetic {
private static cache;
/**
* Default phonetic options.
*
* This object contains default settings for phonetic algorithms,
* implemented in the subclass.
*/
protected static default: PhoneticOptions;
private readonly algo;
protected readonly options: PhoneticOptions;
protected readonly map: PhoneticMap;
/**
* Static method to clear the cache of indexed words.
*/
static clear(): void;
/**
* Constructor for the Phonetic class.
*
* Initializes the phonetic algorithm with the specified options and mapping.
*
* @param {string} algo - The name of the algorithm (e.g. 'soundex')
* @param {PhoneticOptions} [opt] - Options for the phonetic algorithm
* @throws {Error} - If the requested mapping is not declared
*/
constructor(algo: string, opt?: PhoneticOptions);
/**
* Applies phonetic rules to a character in a word context.
*
* This method is designed to be generic and efficient for all phonetic algorithms.
* It checks all rule types (prev, next, prevNot, nextNot, position, etc.) and
* returns either the appropriate code (string) or undefined.
*
* @param {string} char - The current character
* @param {number} i - The current position within the word
* @param {string[]} chars - The word as an array of characters
* @param {number} charLen - The total length of the word
* @returns {string|undefined} - The rule code or undefined if no rule applies
*/
protected applyRules(char: string, i: number, chars: string[], charLen: number): string | undefined;
/**
* Generates the phonetic code for a given word.
*
* This method processes the word character by character, applying phonetic rules
* and mappings to generate a phonetic code.
*
* @param {string} word - The input word to be converted into a phonetic code
* @returns {string} - The generated phonetic code
*/
protected encode(word: string): string;
/**
* Converts a character to its phonetic code based on the mapping and rules.
*
* @param {string} char - The current character
* @param {number} i - The current position within the word
* @param {string[]} chars - The word as an array of characters
* @param {number} charLen - The total length of the word
* @param {string|null} lastCode - The last code generated (to avoid duplicates)
* @param {Record<string, string>} map - The phonetic mapping
* @returns {string|undefined} - The phonetic code or undefined if no code applies
*/
protected mapChar(char: string, i: number, chars: string[], charLen: number, lastCode: string | null, map: Record<string, string>): string | undefined;
/**
* Ensures the phonetic code has a fixed length by padding or truncating.
*
* @param {string} input - The input string to be adjusted
* @returns {string} - The adjusted string with fixed length
*/
protected equalLen(input: string): string;
/**
* Converts a word into an array of characters.
*
* @param {string} word - The input word to be converted
* @returns {string[]} - An array of characters from the input word
*/
protected word2Chars(word: string): string[];
/**
* Determines whether to exit early based on the current phonetic code length.
*
* @param {string} code - The current phonetic code
* @param {number} i - The current index in the word
* @returns {boolean} - True if the code length exceeds the specified limit, false otherwise
*/
protected exitEarly(code: string, i: number): boolean;
/**
* Adjusts the phonetic code.
*
* @param {string} code - The phonetic code to be adjusted
* @param {string[]} chars - Characters to be removed from the code
* @returns {string} - The adjusted phonetic code
*/
protected adjustCode(code: string, chars: string[]): string;
/**
* Processes an array of words to generate their phonetic indices.
*
* This method iterates over each word, generates its phonetic code,
* and ensures that the resulting codes are of equal length.
*
* @param {string[]} words - An array of words to be processed
* @returns {string[]} - An array of phonetic indices for the input words
*/
protected loop(words: string[]): string[];
/**
* Asynchronously processes an array of words to generate their phonetic indices.
*
* This method iterates over each word, generates its phonetic code asynchronously,
* and ensures that the resulting codes are of equal length.
*
* @param {string[]} words - An array of words to be processed
* @returns {Promise<string[]>} - A promise that resolves to an array of phonetic indices for the input words
*/
protected loopAsync(words: string[]): Promise<string[]>;
/**
* Get the name of the phonetic algorithm.
*
* @returns {string} - The name of the algorithm
*/
getAlgoName(): string;
/**
* Generates a phonetic index for the given input string.
*
* @param {string} input - The input string to be indexed
* @returns {string[]} - An array of phonetic indices for the input words
*/
getIndex(input: string): string[];
/**
* Asynchronously generates a phonetic index for the given input string.
*
* @param {string} input - The input string to be indexed
* @returns {Promise<string[]>} - A promise that resolves to an array of phonetic indices for the input words
*/
getIndexAsync(input: string): Promise<string[]>;
}
/**
* Phonetic registry service for managing phonetic implementations.
*
* This registry allows for dynamic registration and retrieval of phonetic classes,
* enabling the use of various phonetic algorithms in a consistent manner.
*/
export declare const PhoneticRegistry: RegistryService<Phonetic>;
/**
* Type definition for the Phonetic class constructor.
*
* This type is used to create instances of the Phonetic class, allowing for
* dynamic instantiation of phonetic algorithms.
*/
export type PhoneticCls = new (...args: any[]) => Phonetic;
/**
* Phonetic Mapping Service
*
* This service provides a simple interface to manage phonetic mappings across
* different phonetic algorithms. It allows adding, removing, checking existence,
* retrieving, and listing phonetic mappings for specified algorithms.
*/
export declare const PhoneticMappingRegistry: PhoneticMappingService;