UNPKG

cmpstr

Version:

CmpStr is a lightweight, fast and well performing package for calculating string similarity

190 lines (189 loc) 8.07 kB
/** * Abstract Phonetic * src/phonetic/Phonetic.ts * * @see https://en.wikipedia.org/wiki/Phonetic_algorithm * * A phonetic algorithm refers to a method for indexing words according to their * pronunciation. When the algorithm relies on orthography, it is significantly * influenced by the spelling conventions of the language for which it is intended: * since the majority of phonetic algorithms were created for English, they tend * to be less effective for indexing words in other languages. * * Phonetic search has numerous applications, and one of the initial use cases has * been in trademark searches to verify that newly registered trademarks do not * pose a risk of infringing upon existing trademarks due to their pronunciation. * * This module provides an abstract class for generating phonetic indices based * on mappings and rules. It allows for the implementation of various phonetic * algorithms by extending the abstract class. * * @module Phonetic * @author Paul Köhler (komed3) * @license MIT */ import type { PhoneticMap, PhoneticOptions, RegistryService, PhoneticMappingService } from '../utils/Types'; /** * Abstract class representing a phonetic algorithm. * * The protected methods `applyRules`, `encode`, `mapChar`, `equalLen`, `word2Chars`, * `exitEarly`, `adjustCode`, `loop` and `loopAsync` can be overridden in subclasses * to implement specific phonetic algorithms. * * @abstract */ export declare abstract class Phonetic { private static cache; /** * Default phonetic options. * * This object contains default settings for phonetic algorithms, * implemented in the subclass. */ protected static default: PhoneticOptions; private readonly algo; protected readonly options: PhoneticOptions; protected readonly map: PhoneticMap; /** * Static method to clear the cache of indexed words. */ static clear(): void; /** * Constructor for the Phonetic class. * * Initializes the phonetic algorithm with the specified options and mapping. * * @param {string} algo - The name of the algorithm (e.g. 'soundex') * @param {PhoneticOptions} [opt] - Options for the phonetic algorithm * @throws {Error} - If the requested mapping is not declared */ constructor(algo: string, opt?: PhoneticOptions); /** * Applies phonetic rules to a character in a word context. * * This method is designed to be generic and efficient for all phonetic algorithms. * It checks all rule types (prev, next, prevNot, nextNot, position, etc.) and * returns either the appropriate code (string) or undefined. * * @param {string} char - The current character * @param {number} i - The current position within the word * @param {string[]} chars - The word as an array of characters * @param {number} charLen - The total length of the word * @returns {string|undefined} - The rule code or undefined if no rule applies */ protected applyRules(char: string, i: number, chars: string[], charLen: number): string | undefined; /** * Generates the phonetic code for a given word. * * This method processes the word character by character, applying phonetic rules * and mappings to generate a phonetic code. * * @param {string} word - The input word to be converted into a phonetic code * @returns {string} - The generated phonetic code */ protected encode(word: string): string; /** * Converts a character to its phonetic code based on the mapping and rules. * * @param {string} char - The current character * @param {number} i - The current position within the word * @param {string[]} chars - The word as an array of characters * @param {number} charLen - The total length of the word * @param {string|null} lastCode - The last code generated (to avoid duplicates) * @param {Record<string, string>} map - The phonetic mapping * @returns {string|undefined} - The phonetic code or undefined if no code applies */ protected mapChar(char: string, i: number, chars: string[], charLen: number, lastCode: string | null, map: Record<string, string>): string | undefined; /** * Ensures the phonetic code has a fixed length by padding or truncating. * * @param {string} input - The input string to be adjusted * @returns {string} - The adjusted string with fixed length */ protected equalLen(input: string): string; /** * Converts a word into an array of characters. * * @param {string} word - The input word to be converted * @returns {string[]} - An array of characters from the input word */ protected word2Chars(word: string): string[]; /** * Determines whether to exit early based on the current phonetic code length. * * @param {string} code - The current phonetic code * @param {number} i - The current index in the word * @returns {boolean} - True if the code length exceeds the specified limit, false otherwise */ protected exitEarly(code: string, i: number): boolean; /** * Adjusts the phonetic code. * * @param {string} code - The phonetic code to be adjusted * @param {string[]} chars - Characters to be removed from the code * @returns {string} - The adjusted phonetic code */ protected adjustCode(code: string, chars: string[]): string; /** * Processes an array of words to generate their phonetic indices. * * This method iterates over each word, generates its phonetic code, * and ensures that the resulting codes are of equal length. * * @param {string[]} words - An array of words to be processed * @returns {string[]} - An array of phonetic indices for the input words */ protected loop(words: string[]): string[]; /** * Asynchronously processes an array of words to generate their phonetic indices. * * This method iterates over each word, generates its phonetic code asynchronously, * and ensures that the resulting codes are of equal length. * * @param {string[]} words - An array of words to be processed * @returns {Promise<string[]>} - A promise that resolves to an array of phonetic indices for the input words */ protected loopAsync(words: string[]): Promise<string[]>; /** * Get the name of the phonetic algorithm. * * @returns {string} - The name of the algorithm */ getAlgoName(): string; /** * Generates a phonetic index for the given input string. * * @param {string} input - The input string to be indexed * @returns {string[]} - An array of phonetic indices for the input words */ getIndex(input: string): string[]; /** * Asynchronously generates a phonetic index for the given input string. * * @param {string} input - The input string to be indexed * @returns {Promise<string[]>} - A promise that resolves to an array of phonetic indices for the input words */ getIndexAsync(input: string): Promise<string[]>; } /** * Phonetic registry service for managing phonetic implementations. * * This registry allows for dynamic registration and retrieval of phonetic classes, * enabling the use of various phonetic algorithms in a consistent manner. */ export declare const PhoneticRegistry: RegistryService<Phonetic>; /** * Type definition for the Phonetic class constructor. * * This type is used to create instances of the Phonetic class, allowing for * dynamic instantiation of phonetic algorithms. */ export type PhoneticCls = new (...args: any[]) => Phonetic; /** * Phonetic Mapping Service * * This service provides a simple interface to manage phonetic mappings across * different phonetic algorithms. It allows adding, removing, checking existence, * retrieving, and listing phonetic mappings for specified algorithms. */ export declare const PhoneticMappingRegistry: PhoneticMappingService;