UNPKG

cmpstr

Version:

CmpStr is a lightweight, fast and well performing package for calculating string similarity

202 lines (199 loc) 6.71 kB
// CmpStr v3.0.3 build-462b952-250813 by Paul Köhler @komed3 / MIT License import { PhoneticRegistry, PhoneticMappingRegistry, Phonetic } from './Phonetic.mjs'; /** * Caverphone Phonetic Algorithm * src/phonetic/Caverphone.ts * * @see https://en.wikipedia.org/wiki/Caverphone * * This module implements the Caverphone phonetic algorithm, which is designed * to encode words into a phonetic representation. The Caverphone algorithm is * used primarily in New Zealand and was developed to assist in the indexing of * names in genealogical databases. * * It converts words into a standardized phonetic code, allowing for variations * in spelling and pronunciation to be matched. * * @module Phonetic/Caverphone * @author Paul Köhler (komed3) * @license MIT */ /** * Caverphone class extends the Phonetic class to implement the Caverphone phonetic algorithm. */ class Caverphone extends Phonetic { // Default options for the Caverphone phonetic algorithm static default = { map: 'en2', delimiter: ' ', length: -1, pad: '', dedupe: false }; /** * Constructor for the Caverphone class. * * Initializes the Caverphone phonetic algorithm with the mapping and options. * * @param {PhoneticOptions} [opt] - Options for the Caverphone phonetic algorithm */ constructor(opt = {}) { super('caverphone', opt); } /** * Generates the Caverphone code for a given word. * * @param {string} word - The input word to be converted into a Caverphone code * @returns {string} - The generated Caverphone code */ encode(word) { // Remove anything not A-Z and convert to lowercase word = word.replace(/[^A-Z]/gi, '').toLowerCase(); // Use the base implementation for rule/mapping application return super.encode(word); } /** * Overrides the mapChar method to skip character mapping. * * @param {string} char - The character to be mapped * @returns {string} - The mapped character */ mapChar(char) { return char; } /** * Adjusts the phonetic code to uppercase. * * @param {string} code - The phonetic code to adjust * @returns {string} - The adjusted phonetic code */ adjustCode(code) { return code.toUpperCase(); } } // Register the Caverphone algorithm in the phonetic registry PhoneticRegistry.add('caverphone', Caverphone); // Register the Caverphone 1.0 phonetic mapping for English PhoneticMappingRegistry.add('caverphone', 'en1', { options: { length: 6, pad: '1' }, map: {}, patterns: [ // Special word-initial replacements { pattern: /^(c|r|t|en)ough/, replace: '$1ou2f' }, { pattern: /^gn/, replace: '2n' }, // Special word-final replacement { pattern: /mb$/, replace: 'm2' }, // Character group replacements { pattern: /cq/g, replace: '2q' }, { pattern: /c(e|i|y)/g, replace: 's$1' }, { pattern: /tch/g, replace: '2ch' }, { pattern: /[cqx]/g, replace: 'k' }, { pattern: /v/g, replace: 'f' }, { pattern: /dg/g, replace: '2g' }, { pattern: /ti(a|o)/g, replace: 'si$1' }, { pattern: /d/g, replace: 't' }, { pattern: /ph/g, replace: 'fh' }, { pattern: /b/g, replace: 'p' }, { pattern: /sh/g, replace: 's2' }, { pattern: /z/g, replace: 's' }, // Vowel handling { pattern: /^[aeiou]/, replace: 'A' }, { pattern: /[aeiou]/g, replace: '3' }, // Special gh handling { pattern: /3gh3/g, replace: '3kh3' }, { pattern: /gh/g, replace: '22' }, // Single character replacements { pattern: /g/g, replace: 'k' }, // Collapse repeated consonants { pattern: /s+/g, replace: 'S' }, { pattern: /t+/g, replace: 'T' }, { pattern: /p+/g, replace: 'P' }, { pattern: /k+/g, replace: 'K' }, { pattern: /f+/g, replace: 'F' }, { pattern: /m+/g, replace: 'M' }, { pattern: /n+/g, replace: 'N' }, // Y and other single-letter handling { pattern: /j/g, replace: 'y' }, // L/R/W/Y3 handling { pattern: /l3/g, replace: 'L3' }, { pattern: /r3/g, replace: 'R3' }, { pattern: /w3/g, replace: 'W3' }, { pattern: /y3/g, replace: 'Y3' }, // L/R/W followed by y { pattern: /ly/g, replace: 'Ly' }, { pattern: /ry/g, replace: 'Ry' }, { pattern: /wy/g, replace: 'Wy' }, // WH handling { pattern: /wh3/g, replace: 'Wh3' }, { pattern: /why/g, replace: 'Why' }, // H at start { pattern: /^h/, replace: 'A' }, // Remove certain letters { pattern: /[hlrwy23]/g, replace: '' } ] }); // Register the Caverphone 2.0 phonetic mapping for English PhoneticMappingRegistry.add('caverphone', 'en2', { options: { length: 10, pad: '1' }, map: {}, patterns: [ // Remove trailing 'e' { pattern: /e$/, replace: '' }, // Special word-initial replacements { pattern: /^(c|r|t|en|tr)ough/, replace: '$1ou2f' }, { pattern: /^gn/, replace: '2n' }, // Special word-final replacement { pattern: /mb$/, replace: 'm2' }, // Character group replacements { pattern: /cq/g, replace: '2q' }, { pattern: /c(e|i|y)/g, replace: 's$1' }, { pattern: /tch/g, replace: '2ch' }, { pattern: /[cqx]/g, replace: 'k' }, { pattern: /v/g, replace: 'f' }, { pattern: /dg/g, replace: '2g' }, { pattern: /ti(a|o)/g, replace: 'si$1' }, { pattern: /d/g, replace: 't' }, { pattern: /ph/g, replace: 'fh' }, { pattern: /b/g, replace: 'p' }, { pattern: /sh/g, replace: 's2' }, { pattern: /z/g, replace: 's' }, // Vowel handling { pattern: /^[aeiou]/, replace: 'A' }, { pattern: /[aeiou]/g, replace: '3' }, // Y handling { pattern: /j/g, replace: 'y' }, { pattern: /^y3/, replace: 'Y3' }, { pattern: /^y/, replace: 'A' }, { pattern: /y/g, replace: '3' }, // Special gh handling { pattern: /3gh3/g, replace: '3kh3' }, { pattern: /gh/g, replace: '22' }, // Single character replacements { pattern: /g/g, replace: 'k' }, // Collapse repeated consonants { pattern: /s+/g, replace: 'S' }, { pattern: /t+/g, replace: 'T' }, { pattern: /p+/g, replace: 'P' }, { pattern: /k+/g, replace: 'K' }, { pattern: /f+/g, replace: 'F' }, { pattern: /m+/g, replace: 'M' }, { pattern: /n+/g, replace: 'N' }, // L/R/W3 handling { pattern: /l3/g, replace: 'L3' }, { pattern: /r3/g, replace: 'R3' }, { pattern: /w3/g, replace: 'W3' }, { pattern: /wh3/g, replace: 'Wh3' }, { pattern: /[lrw]$/, replace: '3' }, // // H at start and final 3 handling { pattern: /^h/, replace: 'A' }, { pattern: /3$/, replace: 'A' }, // Remove certain letters { pattern: /[hlrw23]/g, replace: '' } ] }); export { Caverphone }; //# sourceMappingURL=Caverphone.mjs.map