UNPKG

cmpstr

Version:

CmpStr is a lightweight, fast and well performing package for calculating string similarity

83 lines (80 loc) 2.83 kB
// CmpStr v3.2.2 build-bb61120-260311 by Paul Köhler @komed3 / MIT License import { ErrorUtil } from './Errors.mjs'; import { HashTable } from './HashTable.mjs'; class Normalizer { static pipeline = new Map(); static cache = new HashTable(); static REGEX = { whitespace: /\s+/g, doubleChars: /(.)\1+/g, specialChars: /[^\p{L}\p{N}\s]/gu, nonLetters: /[^\p{L}]/gu, nonNumbers: /\p{N}/gu }; static canonicalFlags(flags) { return Array.from(new Set(flags)).sort().join(''); } static getPipeline(flags) { return ErrorUtil.wrap( () => { if (Normalizer.pipeline.has(flags)) return Normalizer.pipeline.get(flags); const { REGEX } = Normalizer; const steps = [ ['d', (s) => s.normalize('NFD')], ['i', (s) => s.toLowerCase()], ['k', (s) => s.replace(REGEX.nonLetters, '')], ['n', (s) => s.replace(REGEX.nonNumbers, '')], ['r', (s) => s.replace(REGEX.doubleChars, '$1')], ['s', (s) => s.replace(REGEX.specialChars, '')], ['t', (s) => s.trim()], ['u', (s) => s.normalize('NFC')], ['w', (s) => s.replace(REGEX.whitespace, ' ')], ['x', (s) => s.normalize('NFKC')] ]; const pipeline = steps .filter(([f]) => flags.includes(f)) .map(([, fn]) => fn); const fn = (s) => pipeline.reduce((v, f) => f(v), s); Normalizer.pipeline.set(flags, fn); return fn; }, `Failed to create normalization pipeline for flags: ${flags}`, { flags } ); } static normalize(input, flags) { return ErrorUtil.wrap( () => { if (!flags || typeof flags !== 'string' || !input) return input; flags = this.canonicalFlags(flags); if (Array.isArray(input)) return input.map((s) => Normalizer.normalize(s, flags)); const key = Normalizer.cache.key(flags, [input]); if (key && Normalizer.cache.has(key)) return Normalizer.cache.get(key); const res = Normalizer.getPipeline(flags)(input); if (key) Normalizer.cache.set(key, res); return res; }, `Failed to normalize input with flags: ${flags}`, { input, flags } ); } static async normalizeAsync(input, flags) { return await ErrorUtil.wrapAsync( async () => { if (!flags || typeof flags !== 'string' || !input) return input; return await (Array.isArray(input) ? Promise.all(input.map((s) => Normalizer.normalize(s, flags))) : Promise.resolve(Normalizer.normalize(input, flags))); }, `Failed to asynchronously normalize input with flags: ${flags}`, { input, flags } ); } static clear() { Normalizer.pipeline.clear(); Normalizer.cache.clear(); } } export { Normalizer };