cmpstr
Version:
CmpStr is a lightweight, fast and well performing package for calculating string similarity
83 lines (80 loc) • 2.83 kB
JavaScript
// CmpStr v3.2.2 build-bb61120-260311 by Paul Köhler @komed3 / MIT License
import { ErrorUtil } from './Errors.mjs';
import { HashTable } from './HashTable.mjs';
class Normalizer {
static pipeline = new Map();
static cache = new HashTable();
static REGEX = {
whitespace: /\s+/g,
doubleChars: /(.)\1+/g,
specialChars: /[^\p{L}\p{N}\s]/gu,
nonLetters: /[^\p{L}]/gu,
nonNumbers: /\p{N}/gu
};
static canonicalFlags(flags) {
return Array.from(new Set(flags)).sort().join('');
}
static getPipeline(flags) {
return ErrorUtil.wrap(
() => {
if (Normalizer.pipeline.has(flags))
return Normalizer.pipeline.get(flags);
const { REGEX } = Normalizer;
const steps = [
['d', (s) => s.normalize('NFD')],
['i', (s) => s.toLowerCase()],
['k', (s) => s.replace(REGEX.nonLetters, '')],
['n', (s) => s.replace(REGEX.nonNumbers, '')],
['r', (s) => s.replace(REGEX.doubleChars, '$1')],
['s', (s) => s.replace(REGEX.specialChars, '')],
['t', (s) => s.trim()],
['u', (s) => s.normalize('NFC')],
['w', (s) => s.replace(REGEX.whitespace, ' ')],
['x', (s) => s.normalize('NFKC')]
];
const pipeline = steps
.filter(([f]) => flags.includes(f))
.map(([, fn]) => fn);
const fn = (s) => pipeline.reduce((v, f) => f(v), s);
Normalizer.pipeline.set(flags, fn);
return fn;
},
`Failed to create normalization pipeline for flags: ${flags}`,
{ flags }
);
}
static normalize(input, flags) {
return ErrorUtil.wrap(
() => {
if (!flags || typeof flags !== 'string' || !input) return input;
flags = this.canonicalFlags(flags);
if (Array.isArray(input))
return input.map((s) => Normalizer.normalize(s, flags));
const key = Normalizer.cache.key(flags, [input]);
if (key && Normalizer.cache.has(key)) return Normalizer.cache.get(key);
const res = Normalizer.getPipeline(flags)(input);
if (key) Normalizer.cache.set(key, res);
return res;
},
`Failed to normalize input with flags: ${flags}`,
{ input, flags }
);
}
static async normalizeAsync(input, flags) {
return await ErrorUtil.wrapAsync(
async () => {
if (!flags || typeof flags !== 'string' || !input) return input;
return await (Array.isArray(input)
? Promise.all(input.map((s) => Normalizer.normalize(s, flags)))
: Promise.resolve(Normalizer.normalize(input, flags)));
},
`Failed to asynchronously normalize input with flags: ${flags}`,
{ input, flags }
);
}
static clear() {
Normalizer.pipeline.clear();
Normalizer.cache.clear();
}
}
export { Normalizer };