cmpstr
Version:
CmpStr is a lightweight, fast and well performing package for calculating string similarity
165 lines (162 loc) • 5.69 kB
JavaScript
// CmpStr v3.2.2 build-bb61120-260311 by Paul Köhler @komed3 / MIT License
import { ErrorUtil } from './utils/Errors.mjs';
import { Filter } from './utils/Filter.mjs';
import { Normalizer } from './utils/Normalizer.mjs';
import { factory } from './utils/Registry.mjs';
import { CmpStr } from './CmpStr.mjs';
class CmpStrAsync extends CmpStr {
static create(opt) {
return new CmpStrAsync(opt);
}
constructor(opt) {
super(opt);
}
async normalizeAsync(input, flags) {
return Normalizer.normalizeAsync(input, flags ?? this.options.flags ?? '');
}
async filterAsync(input, hook) {
return Filter.applyAsync(hook, input);
}
async prepareAsync(input, opt) {
const { flags, processors } = opt ?? this.options;
if (flags?.length) input = await this.normalizeAsync(input, flags);
input = await this.filterAsync(input, 'input');
if (processors?.phonetic)
input = await this.indexAsync(input, processors.phonetic);
return input;
}
async indexAsync(input, { algo, opt }) {
this.assert('phonetic', algo);
const phonetic = factory['phonetic'](algo, opt);
const delimiter = opt?.delimiter ?? ' ';
return Array.isArray(input)
? Promise.all(
input.map((s) =>
phonetic.getIndexAsync(s).then((r) => r.join(delimiter))
)
)
: phonetic.getIndexAsync(input).then((r) => r.join(delimiter));
}
async computeAsync(a, b, opt, mode, raw, skip) {
return ErrorUtil.wrapAsync(
async () => {
const resolved = this.resolveOptions(opt);
this.assert('metric', resolved.metric);
const A = skip ? a : await this.prepareAsync(a, resolved);
const B = skip ? b : await this.prepareAsync(b, resolved);
if (
resolved.safeEmpty &&
((Array.isArray(A) && A.length === 0) ||
(Array.isArray(B) && B.length === 0) ||
A === '' ||
B === '')
) {
return [];
}
const metric = factory['metric'](resolved.metric, A, B, resolved.opt);
if (resolved.output !== 'prep') metric.setOriginal(a, b);
await metric.runAsync(mode);
const result = this.postProcess(metric.getResults(), resolved);
return this.output(result, raw ?? resolved.raw);
},
`Failed to compute metric <${opt?.metric ?? this.options.metric}> for the given inputs`,
{ a, b, opt }
);
}
async testAsync(a, b, opt) {
return this.computeAsync(a, b, opt, 'single');
}
async compareAsync(a, b, opt) {
return (await this.computeAsync(a, b, opt, 'single', true)).res;
}
async batchTestAsync(a, b, opt) {
return this.computeAsync(a, b, opt, 'batch');
}
async batchSortedAsync(a, b, dir = 'desc', opt) {
const res = await this.computeAsync(a, b, opt, 'batch', true);
return this.output(
res.sort((a, b) => (dir === 'asc' ? a.res - b.res : b.res - a.res)),
opt?.raw ?? this.options.raw
);
}
async pairsAsync(a, b, opt) {
return this.computeAsync(a, b, opt, 'pairwise');
}
async matchAsync(a, b, threshold, opt) {
const res = await this.computeAsync(a, b, opt, 'batch', true);
return this.output(
res.filter((r) => r.res >= threshold).sort((a, b) => b.res - a.res),
opt?.raw ?? this.options.raw
);
}
async closestAsync(a, b, n = 1, opt) {
return (await this.batchSortedAsync(a, b, 'desc', opt)).slice(0, n);
}
async furthestAsync(a, b, n = 1, opt) {
return (await this.batchSortedAsync(a, b, 'asc', opt)).slice(0, n);
}
async searchAsync(needle, haystack, flags, processors) {
const resolved = this.resolveOptions({ flags, processors });
const test = await this.prepareAsync(needle, resolved);
const hstk = await this.prepareAsync(haystack, resolved);
return haystack.filter((_, i) => hstk[i].includes(test));
}
async matrixAsync(input, opt) {
input = await this.prepareAsync(input, this.resolveOptions(opt));
return Promise.all(
input.map(
async (a) =>
await this.computeAsync(
a,
input,
undefined,
'batch',
true,
true
).then((r) => r.map((b) => b.res ?? 0))
)
);
}
async phoneticIndexAsync(input, algo, opt) {
const { algo: a, opt: o } = this.options.processors?.phonetic ?? {};
return this.indexAsync(input, { algo: algo ?? a, opt: opt ?? o });
}
async structuredLookupAsync(query, data, key, opt) {
return await this.structured(data, key).lookupAsync(
(q, items, options) => this.batchTestAsync(q, items, options),
query,
opt
);
}
async structuredMatchAsync(query, data, key, threshold, opt) {
return await this.structured(data, key).lookupAsync(
(q, items, options) => this.matchAsync(q, items, threshold, options),
query,
{ ...opt, sort: 'desc' }
);
}
async structuredClosestAsync(query, data, key, n = 1, opt) {
return await this.structured(data, key).lookupAsync(
(q, items, options) => this.closestAsync(q, items, n, options),
query,
{ ...opt, sort: 'desc' }
);
}
async structuredFurthestAsync(query, data, key, n = 1, opt) {
return await this.structured(data, key).lookupAsync(
(q, items, options) => this.furthestAsync(q, items, n, options),
query,
{ ...opt, sort: 'asc' }
);
}
async structuredPairsAsync(data, key, other, otherKey, opt) {
return await this.structured(data, key).lookupPairsAsync(
(items, otherItems, options) =>
this.pairsAsync(items, otherItems, options),
other,
otherKey,
opt
);
}
}
export { CmpStrAsync };