cmpstr
Version:
CmpStr is a lightweight, fast and well performing package for calculating string similarity
178 lines (177 loc) • 8.42 kB
TypeScript
/**
* StructuredData - Structured Data Processing Utility
* src/utils/StructuredData.ts
*
* This utility provides a factory for processing arrays of structured objects,
* enabling efficient lookups and comparisons on specific object properties.
*
* Features:
* - Support for arbitrary object structures and property keys
* - Flexible extraction and transformation of object properties
* - Batch comparison with original object reconstruction
* - Full TypeScript type safety with generics
* - Integration with CmpStr comparison methods
* - Optional "objects-only" output mode for minimal result structure
*
* @module Utils
* @name StructuredData
* @author Paul Köhler (komed3)
* @license MIT
*/
import type { CmpFnResult, CmpStrOptions, MetricRaw, StructuredDataBatchResult, StructuredDataOptions } from './Types';
/**
* The StructuredData class provides factory methods for processing arrays of
* structured objects with string comparison capabilities.
*
* @template T - The type of objects in the data array
* @template R - The type of the metric raw result
*/
export declare class StructuredData<T = any, R = MetricRaw> {
private readonly data;
private readonly key;
/**
* Creates a new StructuredData instance for processing structured data.
*
* @param {T[]} data - The array of objects to process
* @param {keyof T} key - The property key to extract for comparison
* @returns {StructuredData< T, R >} - A new class instance
*/
static create<T = any, R = MetricRaw>(data: T[], key: keyof T): StructuredData<T, R>;
/**
* Creates a new StructuredData instance.
*
* @param {T[]} data - The array of objects to process
* @param {keyof T} key - The property key to extract for comparison
*/
private constructor();
/**
* Extracts properties from another array.
*
* @template A - The type of objects in the array
* @param {A[]} arr - The array to extract from
* @param {keyof A} key - The property key
* @returns {string[]} - Array of extracted strings
*/
private extractFrom;
/**
* Extracts string values from the data array using the configured key.
*
* @returns {string[]} - Array of extracted strings
*/
private extract;
/**
* Type guard to check if a value is MetricResultSingle<R>.
*
* @param {unknown} v - The value to check
* @returns {v is MetricResultSingle< R >} - True if v is MetricResultSingle<R>
*/
private isMetricResult;
/**
* Type guard to check if a value is CmpStrResult & { raw?: R }.
*
* @param {unknown} v - The value to check
* @returns {v is CmpStrResult & { raw?: R }
*/
private isCmpStrResult;
/**
* Normalizes metric results to a consistent format.
* Attaches original indices for correct mapping after sorting.
* Handles both CmpStrResult[] and MetricResultBatch<R> formats.
*
* @param {any} results - The raw metric results
* @returns {IndexedResult< R >[]} - Normalized results with indices
* @throws {CmpStrValidationError} - If the results format is unsupported
*/
private normalizeResults;
/**
* Rebuilds results with original objects attached.
* Maps results to source objects using target string matching with duplicate handling.
* Works correctly even when results are filtered or subset (e.g., from closest/furthest).
*
* @param {IndexedResult< R >[]} results - The normalized metric results
* @param {T[]} sourceData - The source data array for object attachment
* @param {string[]} extractedStrings - The extracted strings array for index mapping
* @param {boolean} [removeZero] - Whether to remove zero similarity results
* @param {boolean} [objectsOnly] - Return only objects without metadata
* @returns {StructuredDataResult< T, R >[] | T[]} - Results with objects (or just objects if objectsOnly=true)
*/
private rebuild;
/**
* Sorts results in-place by match score.
* Preserves __idx for tracking original positions.
*
* @param {IndexedResult< R >[]} results - The results to sort
* @param {string | boolean} [sort] - Sort direction (asc, desc, or boolean true=desc)
* @returns {IndexedResult< R >[]} - Sorted results
*/
private sort;
/**
* Finalizes the lookup process by normalizing, sorting, and rebuilding results.
*
* @param {CmpFnResult< R >} results - The raw metric results
* @param {string[]} extractedStrings - The extracted strings for index mapping
* @param {StructuredDataOptions} [opt] - Additional options
* @returns {StructuredDataBatchResult< T, R > | T[]} - The finalized lookup results
*/
private finalizeLookup;
/**
* Performs a lookup with a synchronous comparison function.
*
* @param {() => CmpFnResult< R >} fn - The comparison function
* @param {string[]} extractedStrings - The extracted strings for index mapping
* @param {StructuredDataOptions} [opt] - Additional options
* @returns {StructuredDataBatchResult< T, R > | T[]} - The lookup results
* @throws {CmpStrUsageError} - If the lookup process fails
*/
private performLookup;
/**
* Performs a lookup with an asynchronous comparison function.
*
* @param {() => Promise< CmpFnResult< R > >} fn - The async comparison function
* @param {string[]} extractedStrings - The extracted strings for index mapping
* @param {StructuredDataOptions} [opt] - Additional options
* @returns {Promise< StructuredDataBatchResult< T, R > | T[] >} - The async lookup results
* @throws {CmpStrUsageError} - If the async lookup process fails
*/
private performLookupAsync;
/**
* Performs a batch comparison against a query string.
*
* @param {() => CmpFnResult< R >} fn - The comparison function
* @param {string} query - The query string to compare against
* @param {StructuredDataOptions} [opt] - Optional lookup options
* @returns {StructuredDataBatchResult< T, R > | T[]} - Results with objects or just objects
*/
lookup(fn: (a: string, b: string[], opt?: CmpStrOptions) => CmpFnResult<R>, query: string, opt?: StructuredDataOptions): StructuredDataBatchResult<T, R> | T[];
/**
* Asynchronously performs a batch comparison against a query string.
*
* @param {() => Promise< CmpFnResult< R > >} fn - The async comparison function
* @param {string} query - The query string to compare against
* @param {StructuredDataOptions} [opt] - Optional lookup options
* @returns {Promise< StructuredDataBatchResult< T, R > | T[] >} - Async results
*/
lookupAsync(fn: (a: string, b: string[], opt?: CmpStrOptions) => Promise<CmpFnResult<R>>, query: string, opt?: StructuredDataOptions): Promise<StructuredDataBatchResult<T, R> | T[]>;
/**
* Performs a pairwise comparison against another array of objects.
*
* @template O - The type of objects in the other array
* @param {() => CmpFnResult< R >} fn - The comparison function
* @param {O[]} other - The other array of objects
* @param {keyof O} otherKey - The property key in the other array
* @param {StructuredDataOptions} [opt] - Optional lookup options
* @returns {StructuredDataBatchResult< T, R > | T[]} - Results with objects or just objects
*/
lookupPairs<O = any>(fn: (a: string[], b: string[], opt?: CmpStrOptions) => CmpFnResult<R>, other: O[], otherKey: keyof O, opt?: StructuredDataOptions): StructuredDataBatchResult<T, R> | T[];
/**
* Asynchronously performs a pairwise comparison against another array of objects.
*
* @template O - The type of objects in the other array
* @param {() => Promise< CmpFnResult< R > >} fn - The async comparison function
* @param {O[]} other - The other array of objects
* @param {keyof O} otherKey - The property key in the other array
* @param {StructuredDataOptions} [opt] - Optional lookup options
* @returns {Promise< StructuredDataBatchResult< T, R > | T[] >} - Async results
*/
lookupPairsAsync<O = any>(fn: (a: string[], b: string[], opt?: CmpStrOptions) => Promise<CmpFnResult<R>>, other: O[], otherKey: keyof O, opt?: StructuredDataOptions): Promise<StructuredDataBatchResult<T, R> | T[]>;
}