UNPKG

cmpstr

Version:

CmpStr is a lightweight, fast and well performing package for calculating string similarity

178 lines (177 loc) 8.42 kB
/** * StructuredData - Structured Data Processing Utility * src/utils/StructuredData.ts * * This utility provides a factory for processing arrays of structured objects, * enabling efficient lookups and comparisons on specific object properties. * * Features: * - Support for arbitrary object structures and property keys * - Flexible extraction and transformation of object properties * - Batch comparison with original object reconstruction * - Full TypeScript type safety with generics * - Integration with CmpStr comparison methods * - Optional "objects-only" output mode for minimal result structure * * @module Utils * @name StructuredData * @author Paul Köhler (komed3) * @license MIT */ import type { CmpFnResult, CmpStrOptions, MetricRaw, StructuredDataBatchResult, StructuredDataOptions } from './Types'; /** * The StructuredData class provides factory methods for processing arrays of * structured objects with string comparison capabilities. * * @template T - The type of objects in the data array * @template R - The type of the metric raw result */ export declare class StructuredData<T = any, R = MetricRaw> { private readonly data; private readonly key; /** * Creates a new StructuredData instance for processing structured data. * * @param {T[]} data - The array of objects to process * @param {keyof T} key - The property key to extract for comparison * @returns {StructuredData< T, R >} - A new class instance */ static create<T = any, R = MetricRaw>(data: T[], key: keyof T): StructuredData<T, R>; /** * Creates a new StructuredData instance. * * @param {T[]} data - The array of objects to process * @param {keyof T} key - The property key to extract for comparison */ private constructor(); /** * Extracts properties from another array. * * @template A - The type of objects in the array * @param {A[]} arr - The array to extract from * @param {keyof A} key - The property key * @returns {string[]} - Array of extracted strings */ private extractFrom; /** * Extracts string values from the data array using the configured key. * * @returns {string[]} - Array of extracted strings */ private extract; /** * Type guard to check if a value is MetricResultSingle<R>. * * @param {unknown} v - The value to check * @returns {v is MetricResultSingle< R >} - True if v is MetricResultSingle<R> */ private isMetricResult; /** * Type guard to check if a value is CmpStrResult & { raw?: R }. * * @param {unknown} v - The value to check * @returns {v is CmpStrResult & { raw?: R } */ private isCmpStrResult; /** * Normalizes metric results to a consistent format. * Attaches original indices for correct mapping after sorting. * Handles both CmpStrResult[] and MetricResultBatch<R> formats. * * @param {any} results - The raw metric results * @returns {IndexedResult< R >[]} - Normalized results with indices * @throws {CmpStrValidationError} - If the results format is unsupported */ private normalizeResults; /** * Rebuilds results with original objects attached. * Maps results to source objects using target string matching with duplicate handling. * Works correctly even when results are filtered or subset (e.g., from closest/furthest). * * @param {IndexedResult< R >[]} results - The normalized metric results * @param {T[]} sourceData - The source data array for object attachment * @param {string[]} extractedStrings - The extracted strings array for index mapping * @param {boolean} [removeZero] - Whether to remove zero similarity results * @param {boolean} [objectsOnly] - Return only objects without metadata * @returns {StructuredDataResult< T, R >[] | T[]} - Results with objects (or just objects if objectsOnly=true) */ private rebuild; /** * Sorts results in-place by match score. * Preserves __idx for tracking original positions. * * @param {IndexedResult< R >[]} results - The results to sort * @param {string | boolean} [sort] - Sort direction (asc, desc, or boolean true=desc) * @returns {IndexedResult< R >[]} - Sorted results */ private sort; /** * Finalizes the lookup process by normalizing, sorting, and rebuilding results. * * @param {CmpFnResult< R >} results - The raw metric results * @param {string[]} extractedStrings - The extracted strings for index mapping * @param {StructuredDataOptions} [opt] - Additional options * @returns {StructuredDataBatchResult< T, R > | T[]} - The finalized lookup results */ private finalizeLookup; /** * Performs a lookup with a synchronous comparison function. * * @param {() => CmpFnResult< R >} fn - The comparison function * @param {string[]} extractedStrings - The extracted strings for index mapping * @param {StructuredDataOptions} [opt] - Additional options * @returns {StructuredDataBatchResult< T, R > | T[]} - The lookup results * @throws {CmpStrUsageError} - If the lookup process fails */ private performLookup; /** * Performs a lookup with an asynchronous comparison function. * * @param {() => Promise< CmpFnResult< R > >} fn - The async comparison function * @param {string[]} extractedStrings - The extracted strings for index mapping * @param {StructuredDataOptions} [opt] - Additional options * @returns {Promise< StructuredDataBatchResult< T, R > | T[] >} - The async lookup results * @throws {CmpStrUsageError} - If the async lookup process fails */ private performLookupAsync; /** * Performs a batch comparison against a query string. * * @param {() => CmpFnResult< R >} fn - The comparison function * @param {string} query - The query string to compare against * @param {StructuredDataOptions} [opt] - Optional lookup options * @returns {StructuredDataBatchResult< T, R > | T[]} - Results with objects or just objects */ lookup(fn: (a: string, b: string[], opt?: CmpStrOptions) => CmpFnResult<R>, query: string, opt?: StructuredDataOptions): StructuredDataBatchResult<T, R> | T[]; /** * Asynchronously performs a batch comparison against a query string. * * @param {() => Promise< CmpFnResult< R > >} fn - The async comparison function * @param {string} query - The query string to compare against * @param {StructuredDataOptions} [opt] - Optional lookup options * @returns {Promise< StructuredDataBatchResult< T, R > | T[] >} - Async results */ lookupAsync(fn: (a: string, b: string[], opt?: CmpStrOptions) => Promise<CmpFnResult<R>>, query: string, opt?: StructuredDataOptions): Promise<StructuredDataBatchResult<T, R> | T[]>; /** * Performs a pairwise comparison against another array of objects. * * @template O - The type of objects in the other array * @param {() => CmpFnResult< R >} fn - The comparison function * @param {O[]} other - The other array of objects * @param {keyof O} otherKey - The property key in the other array * @param {StructuredDataOptions} [opt] - Optional lookup options * @returns {StructuredDataBatchResult< T, R > | T[]} - Results with objects or just objects */ lookupPairs<O = any>(fn: (a: string[], b: string[], opt?: CmpStrOptions) => CmpFnResult<R>, other: O[], otherKey: keyof O, opt?: StructuredDataOptions): StructuredDataBatchResult<T, R> | T[]; /** * Asynchronously performs a pairwise comparison against another array of objects. * * @template O - The type of objects in the other array * @param {() => Promise< CmpFnResult< R > >} fn - The async comparison function * @param {O[]} other - The other array of objects * @param {keyof O} otherKey - The property key in the other array * @param {StructuredDataOptions} [opt] - Optional lookup options * @returns {Promise< StructuredDataBatchResult< T, R > | T[] >} - Async results */ lookupPairsAsync<O = any>(fn: (a: string[], b: string[], opt?: CmpStrOptions) => Promise<CmpFnResult<R>>, other: O[], otherKey: keyof O, opt?: StructuredDataOptions): Promise<StructuredDataBatchResult<T, R> | T[]>; }