UNPKG

cmpstr

Version:

CmpStr is a lightweight, fast and well performing package for calculating string similarity

173 lines (169 loc) 5.95 kB
// CmpStr v3.0.1 dev-052fa0c-250614 by Paul Köhler @komed3 / MIT License 'use strict'; var Phonetic = require('./Phonetic.cjs'); /** * Metaphone Phonetic Algorithm * src/phonetic/Metaphone.ts * * @see https://en.wikipedia.org/wiki/Metaphone * * Metaphone is a phonetic algorithm for indexing words by their English pronunciation. * It encodes words into a string of consonant symbols, allowing for the comparison of * words based on their pronunciation rather than their spelling. Metaphone is more * accurate than Soundex for English and is widely used in search, spell-checking, * and fuzzy matching. * * This implementation uses a mapping and a comprehensive ruleset to efficiently * transform input words into their Metaphone code. The algorithm drops or transforms * letters according to context-sensitive rules, and only retains vowels at the start. * * @module Phonetic/Metaphone * @author Paul Köhler (komed3) * @license MIT */ /** * Metaphone class extends the Phonetic class to implement the Metaphone phonetic algorithm. */ class Metaphone extends Phonetic.Phonetic { // Default options for the Metaphone phonetic algorithm static default = { map: 'en90', delimiter: ' ', length: -1, pad: '', dedupe: false }; /** * Constructor for the Metaphone class. * * Initializes the Metaphone phonetic algorithm with the mapping and options. * * @param {PhoneticOptions} [opt] - Options for the Metaphone phonetic algorithm */ constructor(opt = {}) { super('metaphone', opt); } /** * Generates the Metaphone code for a given word. * * @param {string} word - The input word to be converted into a Metaphone code * @returns {string} - The generated Metaphone code */ encode(word) { // Remove duplicate adjacent letters except for C word = word.replace(/([A-BD-Z])\1+/gi, (m, c) => (c === 'C' ? m : c)); // Use the base implementation for rule/mapping application return super.encode(word); } /** * Adjusts the Metaphone code by removing vowels except for the first letter. * * @param {string} code - The Metaphone code to be adjusted * @returns {string} - The adjusted Metaphone code */ adjustCode(code) { // Remove vowels except for the first letter return code.slice(0, 1) + code.slice(1).replace(/[AEIOU]/g, ''); } } // Register the Metaphone algorithm in the phonetic registry Phonetic.PhoneticRegistry.add('metaphone', Metaphone); /** * Register the Metaphone phonetic mapping for English. * * This version is based on the original BASIC implementation from 1990, * written by Lawrence Philips. * * @see https://gist.github.com/Rostepher/b688f709587ac145a0b3 */ Phonetic.PhoneticMappingRegistry.add('metaphone', 'en90', { map: { a: 'A', b: 'B', c: 'K', d: 'T', e: 'E', f: 'F', g: 'K', h: 'H', i: 'I', j: 'J', k: 'K', l: 'L', m: 'M', n: 'N', o: 'O', p: 'P', q: 'K', r: 'R', s: 'S', t: 'T', u: 'U', v: 'F', w: 'W', x: 'KS', y: 'Y', z: 'S' }, ruleset: [ // Drop the first letter if the string begins with `AE`, `GN`, `KN`, `PN` or `WR` { char: 'a', position: 'start', next: ['e'], code: '' }, { char: 'g', position: 'start', next: ['n'], code: '' }, { char: 'k', position: 'start', next: ['n'], code: '' }, { char: 'p', position: 'start', next: ['n'], code: '' }, { char: 'w', position: 'start', next: ['r'], code: '' }, // Drop `B` if after `M` at the end of the string { char: 'b', position: 'end', prev: ['m'], code: '' }, // `C` transforms into `X` if followed by `H` or `IA` { char: 'c', next: ['h'], prevNot: ['s'], code: 'X' }, { char: 'c', next: ['i'], next2: ['a'], code: 'X' }, // `C` transforms into `S` if followed by `E`, `I` or `Y` { char: 'c', next: ['e', 'i', 'y'], code: 'S' }, // `D` transforms into `J` if followed by `GE`, `GI` or `GY` { char: 'd', next: ['g'], next2: ['e', 'i', 'y'], code: 'J' }, // Drop `G` if followed by `H` and `H` is not at the end or before a vowel { char: 'g', next: ['h'], next2Not: ['', 'a', 'e', 'i', 'o', 'u'], code: '' }, // Drop `G` if followed by `N` or `NED` and is at the end of the string { char: 'g', trailing: 'n', code: '' }, { char: 'g', trailing: 'ned', code: '' }, // `G` transforms into `J` if before `E`, `I` or `Y` and is not a `GG` { char: 'g', next: ['e', 'i', 'y'], prevNot: ['g'], code: 'J' }, // Drop `H` if after a vowel and not before a vowel { char: 'h', prev: ['a', 'e', 'i', 'o', 'u'], nextNot: ['a', 'e', 'i', 'o', 'u'], code: '' }, // Drop `H` if after `C`, `G`, `P`, `S` or `T` { char: 'h', prev: ['c', 'g', 'p', 's', 't'], code: '' }, // Drop `K` if after `C` { char: 'k', prev: ['c'], code: '' }, // `PH` transforms into `F` { char: 'p', next: ['h'], code: 'F' }, // `S` transforms into `X` if followed by `H`, `IA` or `IO` { char: 's', next: ['h'], code: 'X' }, { char: 's', next: ['i'], next2: ['a', 'o'], code: 'X' }, // `T` transforms into `X` if followed by `IA` or `IO` { char: 't', next: ['i'], next2: ['a', 'o'], code: 'X' }, // `TH` transforms into `0` (zero) { char: 't', next: ['h'], code: '0' }, // Drop `T` if followed by `CH` { char: 't', next: ['c'], next2: ['h'], code: '' }, // Drop `W` if not followed by a vowel { char: 'w', nextNot: ['a', 'e', 'i', 'o', 'u'], code: '' }, // `WH` transforms into `W` if at the beginning of the string { char: 'h', leading: 'w', code: '' }, // `X` transforms into `S` if at the beginning { char: 'x', position: 'start', code: 'S' }, // Drop `Y` if not followed by a vowel { char: 'y', nextNot: ['a', 'e', 'i', 'o', 'u'], code: '' } ] }); exports.Metaphone = Metaphone; //# sourceMappingURL=Metaphone.cjs.map