cmpstr
Version:
CmpStr is a lightweight, fast and well performing package for calculating string similarity
337 lines (333 loc) • 13.5 kB
JavaScript
// CmpStr v3.0.1 dev-052fa0c-250614 by Paul Köhler @komed3 / MIT License
;
var CmpStr = require('./CmpStr.cjs');
var Normalizer = require('./utils/Normalizer.cjs');
var Filter = require('./utils/Filter.cjs');
var Registry = require('./utils/Registry.cjs');
/**
* CmpStrAsync Asynchronous API
* src/CmpStrAsync.ts
*
* The CmpStrAsync class provides a fully asynchronous, Promise-based interface for
* advanced string comparison, similarity measurement, phonetic indexing, filtering
* and normalization. It extends the CmpStr class and overrides all relevant methods
* to support non-blocking, scalable, and I/O-friendly workloads.
*
* Features:
* - Asynchronous normalization, filtering, and metric computation
* - Async batch, pairwise, and single string comparison with detailed results
* - Async phonetic indexing and phonetic-aware search and comparison
* - Full compatibility with the synchronous CmpStr API
* - Designed for large-scale, high-performance, and server-side applications
*
* @module CmpStrAsync
* @author Paul Köhler (komed3)
* @license MIT
*/
/**
* The CmpStrAsync class provides a fully asynchronous API for string comparison,
* phonetic indexing, filtering and normalization.
*
* @template R - The type of the metric result, defaults to MetricRaw
*/
class CmpStrAsync extends CmpStr.CmpStr {
/**
* --------------------------------------------------------------------------------
* Instanciate the CmpStrAsync class
* --------------------------------------------------------------------------------
*
* Methods to create a new CmpStrAsync instance with the given options.
* Using the static `create` method is recommended to ensure proper instantiation.
*/
/**
* Creates a new CmpStrAsync instance with the given options.
*
* @param {string|CmpStrOptions} [opt] - Optional serialized or options object
* @returns {CmpStrAsync<R>} - A new CmpStrAsync instance
*/
static create(opt) {
return new CmpStrAsync(opt);
}
/**
* Creates a new CmpStrAsync instance calliing the super constructor.
*
* @param {string|CmpStrOptions} [opt] - Optional serialized or options object
*/
constructor(opt) {
super(opt);
}
/**
* ---------------------------------------------------------------------------------
* Protected asynchronously utility methods for internal use
* ---------------------------------------------------------------------------------
*
* These methods provide asynchronous normalization, filtering, and metric
* computation capabilities, allowing for non-blocking operations.
*/
/**
* Asynchronously normalizes the input string or array using the configured or provided flags.
*
* @param {MetricInput} input - The input string or array
* @param {NormalizeFlags} [flags] - Normalization flags
* @returns {Promise<MetricInput>} - The normalized input
*/
async normalizeAsync(input, flags) {
return Normalizer.Normalizer.normalizeAsync(
input,
flags ?? this.options.flags ?? ''
);
}
/**
* Asynchronously applies all active filters to the input string or array.
*
* @param {MetricInput} input - The input string or array
* @param {string} [hook='input'] - The filter hook
* @returns {Promise<MetricInput>} - The filtered string(s)
*/
async filterAsync(input, hook) {
return Filter.Filter.applyAsync(hook, input);
}
/**
* Asynchronously prepares the input by normalizing and filtering.
*
* @param {MetricInput} [input] - The input string or array
* @param {CmpStrOptions} [opt] - Optional options to use
* @returns {Promise<MetricInput>} - The prepared input
*/
async prepareAsync(input, opt) {
const { flags, processors } = opt ?? this.options;
// Normalize the input using flags (i.e., 'itw')
if (flags?.length) input = await this.normalizeAsync(input, flags);
// Filter the input using hooked up filters
input = await this.filterAsync(input, 'input');
// Apply phonetic processors if configured
if (processors?.phonetic)
input = await this.indexAsync(input, processors.phonetic);
return input;
}
/**
* Asynchronously computes the phonetic index for the given input using
* the specified phonetic algorithm.
*
* @param {MetricInput} input - The input string or array
* @param {{ algo: string, opt?: PhoneticOptions }} options - The phonetic algorithm and options
* @returns {Promise<MetricInput>} - The phonetic index for the given input
*/
async indexAsync(input, { algo, opt }) {
this.assert('phonetic', algo);
const phonetic = Registry.factory.phonetic(algo, opt);
const delimiter = opt?.delimiter ?? ' ';
return Array.isArray(input)
? Promise.all(
input.map((s) =>
phonetic.getIndexAsync(s).then((r) => r.join(delimiter))
)
)
: phonetic.getIndexAsync(input).then((r) => r.join(delimiter));
}
/**
* Asynchronously computes the metric result for the given inputs, applying
* normalization and filtering as configured.
*
* @template T - The type of the metric result
* @param {MetricInput} a - The first input string or array
* @param {MetricInput} b - The second input string or array
* @param {CmpStrOptions} [opt] - Optional options to use
* @param {MetricMode} [mode='single'] - The metric mode to use
* @param {boolean} [raw=false] - Whether to return raw results
* @param {boolean} [skip=false] - Whether to skip normalization and filtering
* @returns {Promise<T>} - The computed metric result
*/
async computeAsync(a, b, opt, mode, raw, skip) {
const resolved = this.resolveOptions(opt);
this.assert('metric', resolved.metric);
// Prepare the input
const A = skip ? a : await this.prepareAsync(a, resolved);
const B = skip ? b : await this.prepareAsync(b, resolved);
// Get the metric class
const metric = Registry.factory.metric(resolved.metric, A, B, resolved.opt);
// Pass the original inputs to the metric
if (resolved.output !== 'prep') metric.setOriginal(a, b);
// Compute the metric result
await metric.runAsync(mode);
// Post-process the results and concat the original inputs
const result = this.postProcess(metric.getResults(), resolved);
// Resolve and return the result based on the raw flag
return this.output(result, raw ?? resolved.raw);
}
/**
* ---------------------------------------------------------------------------------
* Public asynchronously core methods for string comparison
* ---------------------------------------------------------------------------------
*
* These methods provide the asynchronous core functionality for string comparison,
* phonetic indexing and text search, allowing for non-blocking operations.
*/
/**
* Asynchronously performs a single metric comparison.
*
* @template T - The type of the metric result
* @param {string} a - The source string
* @param {string} b - The target string
* @param {CmpStrOptions} [opt] - Optional options
* @returns {Promise<T>} - The metric result
*/
async testAsync(a, b, opt) {
return this.computeAsync(a, b, opt, 'single');
}
/**
* Asynchronously performs a single metric comparison returning the numeric score.
*
* @param {string} a - The source string
* @param {string} b - The target string
* @param {CmpStrOptions} [opt] - Optional options
* @returns {Promise<number>} - The similarity score (0..1)
*/
async compareAsync(a, b, opt) {
return (await this.computeAsync(a, b, opt, 'single', true)).res;
}
/**
* Asynchronously performs a batch metric comparison between source and target
* strings or array of strings.
*
* @template T - The type of the metric result
* @param {MetricInput} a - The source string or array of strings
* @param {MetricInput} b - The target string or array of strings
* @param {CmpStrOptions} [opt] - Optional options
* @returns {Promise<T>} - The batch metric results
*/
async batchTestAsync(a, b, opt) {
return this.computeAsync(a, b, opt, 'batch');
}
/**
* Asynchronously performs a batch metric comparison and returns results sorted by score.
*
* @template T - The type of the metric result
* @param {MetricInput} a - The source string or array of strings
* @param {MetricInput} b - The target string or array of strings
* @param {'desc'|'asc'} [dir='desc'] - Sort direction (desc, asc)
* @param {CmpStrOptions} [opt] - Optional options
* @returns {Promise<T>} - The sorted batch results
*/
async batchSortedAsync(a, b, dir = 'desc', opt) {
const res = await this.computeAsync(a, b, opt, 'batch', true);
return this.output(
res.sort((a, b) => (dir === 'asc' ? a.res - b.res : b.res - a.res)),
opt?.raw ?? this.options.raw
);
}
/**
* Asynchronously performs a pairwise metric comparison between source and target
* strings or array of strings.
*
* @template T - The type of the metric result
* Input arrays needs of the same length to perform pairwise comparison,
* otherwise the method will throw an error.
*
* @param {MetricInput} a - The source string or array of strings
* @param {MetricInput} b - The target string or array of strings
* @param {CmpStrOptions} [opt] - Optional options
* @returns {Promise<T>} - The pairwise metric results
*/
async pairsAsync(a, b, opt) {
return this.computeAsync(a, b, opt, 'pairwise');
}
/**
* Asynchronously performs a batch comparison and returns only results above the threshold.
*
* @template T - The type of the metric result
* @param {MetricInput} a - The source string or array of strings
* @param {MetricInput} b - The target string or array of strings
* @param {number} threshold - The similarity threshold (0..1)
* @param {CmpStrOptions} [opt] - Optional options
* @returns {Promise<T>} - The filtered batch results
*/
async matchAsync(a, b, threshold, opt) {
const res = await this.computeAsync(a, b, opt, 'batch', true);
return this.output(
res.filter((r) => r.res >= threshold).sort((a, b) => b.res - a.res),
opt?.raw ?? this.options.raw
);
}
/**
* Asynchronously returns the n closest matches from a batch comparison.
*
* @template T - The type of the metric result
* @param {MetricInput} a - The source string or array of strings
* @param {MetricInput} b - The target string or array of strings
* @param {number} [n=1] - Number of closest matches
* @param {CmpStrOptions} [opt] - Optional options
* @returns {Promise<T>} - The closest matches
*/
async closestAsync(a, b, n = 1, opt) {
return (await this.batchSortedAsync(a, b, 'desc', opt)).slice(0, n);
}
/**
* Asynchronously returns the n furthest matches from a batch comparison.
*
* @template T - The type of the metric result
* @param {MetricInput} a - The source string or array of strings
* @param {MetricInput} b - The target string or array of strings
* @param {number} [n=1] - Number of furthest matches
* @param {CmpStrOptions} [opt] - Optional options
* @returns {Promise<T>} - The furthest matches
*/
async furthestAsync(a, b, n = 1, opt) {
return (await this.batchSortedAsync(a, b, 'asc', opt)).slice(0, n);
}
/**
* Asynchronously performs a normalized and filtered substring search.
*
* @param {string} needle - The search string
* @param {string[]} haystack - The array to search in
* @param {NormalizeFlags} [flags] - Normalization flags
* @param {CmpStrProcessors} [processors] - Pre-processors to apply
* @returns {Promise<string[]>} - Array of matching entries
*/
async searchAsync(needle, haystack, flags, processors) {
const resolved = this.resolveOptions({ flags, processors });
// Prepare the needle and haystack, normalizing and filtering them
const test = await this.prepareAsync(needle, resolved);
const hstk = await this.prepareAsync(haystack, resolved);
// Filter the haystack based on the normalized test string
return haystack.filter((_, i) => hstk[i].includes(test));
}
/**
* Asynchronously computes a similarity matrix for the given input array.
*
* @param {string[]} input - The input array
* @param {CmpStrOptions} [opt] - Optional options
* @returns {Promise<number[][]>} - The similarity matrix
*/
async matrixAsync(input, opt) {
input = await this.prepareAsync(input, this.resolveOptions(opt));
return Promise.all(
input.map(
async (a) =>
await this.computeAsync(
a,
input,
undefined,
'batch',
true,
true
).then((r) => r.map((b) => b.res ?? 0))
)
);
}
/**
* Asynchronously computes the phonetic index for a string using the
* configured or given algorithm.
*
* @param {string} [input] - The input string
* @param {string} [algo] - The phonetic algorithm to use
* @param {PhoneticOptions} [opt] - Optional phonetic options
* @returns {Promise<string>} - The phonetic index as a string
*/
async phoneticIndexAsync(input, algo, opt) {
const { algo: a, opt: o } = this.options.processors?.phonetic ?? {};
return this.indexAsync(input, { algo: algo ?? a, opt: opt ?? o });
}
}
exports.CmpStrAsync = CmpStrAsync;
//# sourceMappingURL=CmpStrAsync.cjs.map