UNPKG

cmpstr

Version:

CmpStr is a lightweight, fast and well performing package for calculating string similarity

337 lines (333 loc) 13.5 kB
// CmpStr v3.0.1 dev-052fa0c-250614 by Paul Köhler @komed3 / MIT License 'use strict'; var CmpStr = require('./CmpStr.cjs'); var Normalizer = require('./utils/Normalizer.cjs'); var Filter = require('./utils/Filter.cjs'); var Registry = require('./utils/Registry.cjs'); /** * CmpStrAsync Asynchronous API * src/CmpStrAsync.ts * * The CmpStrAsync class provides a fully asynchronous, Promise-based interface for * advanced string comparison, similarity measurement, phonetic indexing, filtering * and normalization. It extends the CmpStr class and overrides all relevant methods * to support non-blocking, scalable, and I/O-friendly workloads. * * Features: * - Asynchronous normalization, filtering, and metric computation * - Async batch, pairwise, and single string comparison with detailed results * - Async phonetic indexing and phonetic-aware search and comparison * - Full compatibility with the synchronous CmpStr API * - Designed for large-scale, high-performance, and server-side applications * * @module CmpStrAsync * @author Paul Köhler (komed3) * @license MIT */ /** * The CmpStrAsync class provides a fully asynchronous API for string comparison, * phonetic indexing, filtering and normalization. * * @template R - The type of the metric result, defaults to MetricRaw */ class CmpStrAsync extends CmpStr.CmpStr { /** * -------------------------------------------------------------------------------- * Instanciate the CmpStrAsync class * -------------------------------------------------------------------------------- * * Methods to create a new CmpStrAsync instance with the given options. * Using the static `create` method is recommended to ensure proper instantiation. */ /** * Creates a new CmpStrAsync instance with the given options. * * @param {string|CmpStrOptions} [opt] - Optional serialized or options object * @returns {CmpStrAsync<R>} - A new CmpStrAsync instance */ static create(opt) { return new CmpStrAsync(opt); } /** * Creates a new CmpStrAsync instance calliing the super constructor. * * @param {string|CmpStrOptions} [opt] - Optional serialized or options object */ constructor(opt) { super(opt); } /** * --------------------------------------------------------------------------------- * Protected asynchronously utility methods for internal use * --------------------------------------------------------------------------------- * * These methods provide asynchronous normalization, filtering, and metric * computation capabilities, allowing for non-blocking operations. */ /** * Asynchronously normalizes the input string or array using the configured or provided flags. * * @param {MetricInput} input - The input string or array * @param {NormalizeFlags} [flags] - Normalization flags * @returns {Promise<MetricInput>} - The normalized input */ async normalizeAsync(input, flags) { return Normalizer.Normalizer.normalizeAsync( input, flags ?? this.options.flags ?? '' ); } /** * Asynchronously applies all active filters to the input string or array. * * @param {MetricInput} input - The input string or array * @param {string} [hook='input'] - The filter hook * @returns {Promise<MetricInput>} - The filtered string(s) */ async filterAsync(input, hook) { return Filter.Filter.applyAsync(hook, input); } /** * Asynchronously prepares the input by normalizing and filtering. * * @param {MetricInput} [input] - The input string or array * @param {CmpStrOptions} [opt] - Optional options to use * @returns {Promise<MetricInput>} - The prepared input */ async prepareAsync(input, opt) { const { flags, processors } = opt ?? this.options; // Normalize the input using flags (i.e., 'itw') if (flags?.length) input = await this.normalizeAsync(input, flags); // Filter the input using hooked up filters input = await this.filterAsync(input, 'input'); // Apply phonetic processors if configured if (processors?.phonetic) input = await this.indexAsync(input, processors.phonetic); return input; } /** * Asynchronously computes the phonetic index for the given input using * the specified phonetic algorithm. * * @param {MetricInput} input - The input string or array * @param {{ algo: string, opt?: PhoneticOptions }} options - The phonetic algorithm and options * @returns {Promise<MetricInput>} - The phonetic index for the given input */ async indexAsync(input, { algo, opt }) { this.assert('phonetic', algo); const phonetic = Registry.factory.phonetic(algo, opt); const delimiter = opt?.delimiter ?? ' '; return Array.isArray(input) ? Promise.all( input.map((s) => phonetic.getIndexAsync(s).then((r) => r.join(delimiter)) ) ) : phonetic.getIndexAsync(input).then((r) => r.join(delimiter)); } /** * Asynchronously computes the metric result for the given inputs, applying * normalization and filtering as configured. * * @template T - The type of the metric result * @param {MetricInput} a - The first input string or array * @param {MetricInput} b - The second input string or array * @param {CmpStrOptions} [opt] - Optional options to use * @param {MetricMode} [mode='single'] - The metric mode to use * @param {boolean} [raw=false] - Whether to return raw results * @param {boolean} [skip=false] - Whether to skip normalization and filtering * @returns {Promise<T>} - The computed metric result */ async computeAsync(a, b, opt, mode, raw, skip) { const resolved = this.resolveOptions(opt); this.assert('metric', resolved.metric); // Prepare the input const A = skip ? a : await this.prepareAsync(a, resolved); const B = skip ? b : await this.prepareAsync(b, resolved); // Get the metric class const metric = Registry.factory.metric(resolved.metric, A, B, resolved.opt); // Pass the original inputs to the metric if (resolved.output !== 'prep') metric.setOriginal(a, b); // Compute the metric result await metric.runAsync(mode); // Post-process the results and concat the original inputs const result = this.postProcess(metric.getResults(), resolved); // Resolve and return the result based on the raw flag return this.output(result, raw ?? resolved.raw); } /** * --------------------------------------------------------------------------------- * Public asynchronously core methods for string comparison * --------------------------------------------------------------------------------- * * These methods provide the asynchronous core functionality for string comparison, * phonetic indexing and text search, allowing for non-blocking operations. */ /** * Asynchronously performs a single metric comparison. * * @template T - The type of the metric result * @param {string} a - The source string * @param {string} b - The target string * @param {CmpStrOptions} [opt] - Optional options * @returns {Promise<T>} - The metric result */ async testAsync(a, b, opt) { return this.computeAsync(a, b, opt, 'single'); } /** * Asynchronously performs a single metric comparison returning the numeric score. * * @param {string} a - The source string * @param {string} b - The target string * @param {CmpStrOptions} [opt] - Optional options * @returns {Promise<number>} - The similarity score (0..1) */ async compareAsync(a, b, opt) { return (await this.computeAsync(a, b, opt, 'single', true)).res; } /** * Asynchronously performs a batch metric comparison between source and target * strings or array of strings. * * @template T - The type of the metric result * @param {MetricInput} a - The source string or array of strings * @param {MetricInput} b - The target string or array of strings * @param {CmpStrOptions} [opt] - Optional options * @returns {Promise<T>} - The batch metric results */ async batchTestAsync(a, b, opt) { return this.computeAsync(a, b, opt, 'batch'); } /** * Asynchronously performs a batch metric comparison and returns results sorted by score. * * @template T - The type of the metric result * @param {MetricInput} a - The source string or array of strings * @param {MetricInput} b - The target string or array of strings * @param {'desc'|'asc'} [dir='desc'] - Sort direction (desc, asc) * @param {CmpStrOptions} [opt] - Optional options * @returns {Promise<T>} - The sorted batch results */ async batchSortedAsync(a, b, dir = 'desc', opt) { const res = await this.computeAsync(a, b, opt, 'batch', true); return this.output( res.sort((a, b) => (dir === 'asc' ? a.res - b.res : b.res - a.res)), opt?.raw ?? this.options.raw ); } /** * Asynchronously performs a pairwise metric comparison between source and target * strings or array of strings. * * @template T - The type of the metric result * Input arrays needs of the same length to perform pairwise comparison, * otherwise the method will throw an error. * * @param {MetricInput} a - The source string or array of strings * @param {MetricInput} b - The target string or array of strings * @param {CmpStrOptions} [opt] - Optional options * @returns {Promise<T>} - The pairwise metric results */ async pairsAsync(a, b, opt) { return this.computeAsync(a, b, opt, 'pairwise'); } /** * Asynchronously performs a batch comparison and returns only results above the threshold. * * @template T - The type of the metric result * @param {MetricInput} a - The source string or array of strings * @param {MetricInput} b - The target string or array of strings * @param {number} threshold - The similarity threshold (0..1) * @param {CmpStrOptions} [opt] - Optional options * @returns {Promise<T>} - The filtered batch results */ async matchAsync(a, b, threshold, opt) { const res = await this.computeAsync(a, b, opt, 'batch', true); return this.output( res.filter((r) => r.res >= threshold).sort((a, b) => b.res - a.res), opt?.raw ?? this.options.raw ); } /** * Asynchronously returns the n closest matches from a batch comparison. * * @template T - The type of the metric result * @param {MetricInput} a - The source string or array of strings * @param {MetricInput} b - The target string or array of strings * @param {number} [n=1] - Number of closest matches * @param {CmpStrOptions} [opt] - Optional options * @returns {Promise<T>} - The closest matches */ async closestAsync(a, b, n = 1, opt) { return (await this.batchSortedAsync(a, b, 'desc', opt)).slice(0, n); } /** * Asynchronously returns the n furthest matches from a batch comparison. * * @template T - The type of the metric result * @param {MetricInput} a - The source string or array of strings * @param {MetricInput} b - The target string or array of strings * @param {number} [n=1] - Number of furthest matches * @param {CmpStrOptions} [opt] - Optional options * @returns {Promise<T>} - The furthest matches */ async furthestAsync(a, b, n = 1, opt) { return (await this.batchSortedAsync(a, b, 'asc', opt)).slice(0, n); } /** * Asynchronously performs a normalized and filtered substring search. * * @param {string} needle - The search string * @param {string[]} haystack - The array to search in * @param {NormalizeFlags} [flags] - Normalization flags * @param {CmpStrProcessors} [processors] - Pre-processors to apply * @returns {Promise<string[]>} - Array of matching entries */ async searchAsync(needle, haystack, flags, processors) { const resolved = this.resolveOptions({ flags, processors }); // Prepare the needle and haystack, normalizing and filtering them const test = await this.prepareAsync(needle, resolved); const hstk = await this.prepareAsync(haystack, resolved); // Filter the haystack based on the normalized test string return haystack.filter((_, i) => hstk[i].includes(test)); } /** * Asynchronously computes a similarity matrix for the given input array. * * @param {string[]} input - The input array * @param {CmpStrOptions} [opt] - Optional options * @returns {Promise<number[][]>} - The similarity matrix */ async matrixAsync(input, opt) { input = await this.prepareAsync(input, this.resolveOptions(opt)); return Promise.all( input.map( async (a) => await this.computeAsync( a, input, undefined, 'batch', true, true ).then((r) => r.map((b) => b.res ?? 0)) ) ); } /** * Asynchronously computes the phonetic index for a string using the * configured or given algorithm. * * @param {string} [input] - The input string * @param {string} [algo] - The phonetic algorithm to use * @param {PhoneticOptions} [opt] - Optional phonetic options * @returns {Promise<string>} - The phonetic index as a string */ async phoneticIndexAsync(input, algo, opt) { const { algo: a, opt: o } = this.options.processors?.phonetic ?? {}; return this.indexAsync(input, { algo: algo ?? a, opt: opt ?? o }); } } exports.CmpStrAsync = CmpStrAsync; //# sourceMappingURL=CmpStrAsync.cjs.map