UNPKG

cmpstr

Version:

CmpStr is a lightweight, fast and well performing package for calculating string similarity

165 lines (162 loc) 5.69 kB
// CmpStr v3.2.2 build-bb61120-260311 by Paul Köhler @komed3 / MIT License import { ErrorUtil } from './utils/Errors.mjs'; import { Filter } from './utils/Filter.mjs'; import { Normalizer } from './utils/Normalizer.mjs'; import { factory } from './utils/Registry.mjs'; import { CmpStr } from './CmpStr.mjs'; class CmpStrAsync extends CmpStr { static create(opt) { return new CmpStrAsync(opt); } constructor(opt) { super(opt); } async normalizeAsync(input, flags) { return Normalizer.normalizeAsync(input, flags ?? this.options.flags ?? ''); } async filterAsync(input, hook) { return Filter.applyAsync(hook, input); } async prepareAsync(input, opt) { const { flags, processors } = opt ?? this.options; if (flags?.length) input = await this.normalizeAsync(input, flags); input = await this.filterAsync(input, 'input'); if (processors?.phonetic) input = await this.indexAsync(input, processors.phonetic); return input; } async indexAsync(input, { algo, opt }) { this.assert('phonetic', algo); const phonetic = factory['phonetic'](algo, opt); const delimiter = opt?.delimiter ?? ' '; return Array.isArray(input) ? Promise.all( input.map((s) => phonetic.getIndexAsync(s).then((r) => r.join(delimiter)) ) ) : phonetic.getIndexAsync(input).then((r) => r.join(delimiter)); } async computeAsync(a, b, opt, mode, raw, skip) { return ErrorUtil.wrapAsync( async () => { const resolved = this.resolveOptions(opt); this.assert('metric', resolved.metric); const A = skip ? a : await this.prepareAsync(a, resolved); const B = skip ? b : await this.prepareAsync(b, resolved); if ( resolved.safeEmpty && ((Array.isArray(A) && A.length === 0) || (Array.isArray(B) && B.length === 0) || A === '' || B === '') ) { return []; } const metric = factory['metric'](resolved.metric, A, B, resolved.opt); if (resolved.output !== 'prep') metric.setOriginal(a, b); await metric.runAsync(mode); const result = this.postProcess(metric.getResults(), resolved); return this.output(result, raw ?? resolved.raw); }, `Failed to compute metric <${opt?.metric ?? this.options.metric}> for the given inputs`, { a, b, opt } ); } async testAsync(a, b, opt) { return this.computeAsync(a, b, opt, 'single'); } async compareAsync(a, b, opt) { return (await this.computeAsync(a, b, opt, 'single', true)).res; } async batchTestAsync(a, b, opt) { return this.computeAsync(a, b, opt, 'batch'); } async batchSortedAsync(a, b, dir = 'desc', opt) { const res = await this.computeAsync(a, b, opt, 'batch', true); return this.output( res.sort((a, b) => (dir === 'asc' ? a.res - b.res : b.res - a.res)), opt?.raw ?? this.options.raw ); } async pairsAsync(a, b, opt) { return this.computeAsync(a, b, opt, 'pairwise'); } async matchAsync(a, b, threshold, opt) { const res = await this.computeAsync(a, b, opt, 'batch', true); return this.output( res.filter((r) => r.res >= threshold).sort((a, b) => b.res - a.res), opt?.raw ?? this.options.raw ); } async closestAsync(a, b, n = 1, opt) { return (await this.batchSortedAsync(a, b, 'desc', opt)).slice(0, n); } async furthestAsync(a, b, n = 1, opt) { return (await this.batchSortedAsync(a, b, 'asc', opt)).slice(0, n); } async searchAsync(needle, haystack, flags, processors) { const resolved = this.resolveOptions({ flags, processors }); const test = await this.prepareAsync(needle, resolved); const hstk = await this.prepareAsync(haystack, resolved); return haystack.filter((_, i) => hstk[i].includes(test)); } async matrixAsync(input, opt) { input = await this.prepareAsync(input, this.resolveOptions(opt)); return Promise.all( input.map( async (a) => await this.computeAsync( a, input, undefined, 'batch', true, true ).then((r) => r.map((b) => b.res ?? 0)) ) ); } async phoneticIndexAsync(input, algo, opt) { const { algo: a, opt: o } = this.options.processors?.phonetic ?? {}; return this.indexAsync(input, { algo: algo ?? a, opt: opt ?? o }); } async structuredLookupAsync(query, data, key, opt) { return await this.structured(data, key).lookupAsync( (q, items, options) => this.batchTestAsync(q, items, options), query, opt ); } async structuredMatchAsync(query, data, key, threshold, opt) { return await this.structured(data, key).lookupAsync( (q, items, options) => this.matchAsync(q, items, threshold, options), query, { ...opt, sort: 'desc' } ); } async structuredClosestAsync(query, data, key, n = 1, opt) { return await this.structured(data, key).lookupAsync( (q, items, options) => this.closestAsync(q, items, n, options), query, { ...opt, sort: 'desc' } ); } async structuredFurthestAsync(query, data, key, n = 1, opt) { return await this.structured(data, key).lookupAsync( (q, items, options) => this.furthestAsync(q, items, n, options), query, { ...opt, sort: 'asc' } ); } async structuredPairsAsync(data, key, other, otherKey, opt) { return await this.structured(data, key).lookupPairsAsync( (items, otherItems, options) => this.pairsAsync(items, otherItems, options), other, otherKey, opt ); } } export { CmpStrAsync };