UNPKG

semantic-ds-toolkit

Version:

Performance-first semantic layer for modern data stacks - Stable Column Anchors & intelligent inference

102 lines 3.28 kB
import { ShadowSemanticsLayer } from '../core/shadow-semantics'; import { CIDRegistry } from '../registry/cid-registry'; import { StatisticalAnalyzer } from '../inference/statistical-analyzer'; export interface SemanticJoinOptions { leftOn: string | string[]; rightOn: string | string[]; how: 'inner' | 'left' | 'right' | 'outer'; confidenceThreshold: number; enableFuzzyMatching: boolean; fuzzyThreshold: number; cacheNormalizedValues: boolean; batchSize: number; autoSelectNormalizers: boolean; preserveOriginalColumns: boolean; } export interface JoinMatchResult { leftIndex: number; rightIndex: number; confidence: number; matchType: 'exact' | 'normalized' | 'fuzzy'; normalizerUsed?: string; metadata: Record<string, any>; } export interface SemanticJoinResult<T = any> { data: T; matches: JoinMatchResult[]; performance: { totalTime: number; normalizationTime: number; matchingTime: number; joinTime: number; cacheHits: number; totalOperations: number; }; statistics: { inputRowsLeft: number; inputRowsRight: number; outputRows: number; matchedRows: number; confidence: { average: number; median: number; distribution: Record<string, number>; }; }; } export interface NormalizerFunction { (value: any): string; } export interface NormalizerRegistry { email: NormalizerFunction; phone: NormalizerFunction; name: NormalizerFunction; address: NormalizerFunction; numeric: NormalizerFunction; date: NormalizerFunction; categorical: NormalizerFunction; default: NormalizerFunction; } export declare class SemanticJoinOperator { private semanticsLayer; private cidRegistry; private statisticalAnalyzer; private adapterRegistry; private joinPlanner; private confidenceCalculator; private normalizers; private cache; private cacheStats; private cacheEnabled; constructor(cidRegistry: CIDRegistry, semanticsLayer?: ShadowSemanticsLayer, statisticalAnalyzer?: StatisticalAnalyzer); semanticJoin<T = any>(left: any, right: any, options?: Partial<SemanticJoinOptions>): Promise<SemanticJoinResult<T>>; private performSemanticMatch; private calculateConfidenceScores; private buildContextMap; private identifyNormalizer; private selectNormalizerFromCID; private inferNormalizerFromConcept; private resolveDominantContext; private resolveRepresentativeStats; private computeSemanticAlignment; private computeContextualRelevance; private createCompositeKey; private normalizeWithCache; private findFuzzyMatches; private describeNormalizers; private calculateStringSimilarity; private levenshteinDistance; private executeJoin; private selectOptimalNormalizer; private areTypesCompatible; private resolveJoinColumns; private initializeNormalizers; clearCache(): void; getCacheStats(): { hits: number; misses: number; hitRate: number; }; addNormalizer(name: keyof NormalizerRegistry, normalizer: NormalizerFunction): void; } //# sourceMappingURL=semantic-join.d.ts.map