semantic-ds-toolkit
Version:
Performance-first semantic layer for modern data stacks - Stable Column Anchors & intelligent inference
102 lines • 3.28 kB
TypeScript
import { ShadowSemanticsLayer } from '../core/shadow-semantics';
import { CIDRegistry } from '../registry/cid-registry';
import { StatisticalAnalyzer } from '../inference/statistical-analyzer';
export interface SemanticJoinOptions {
leftOn: string | string[];
rightOn: string | string[];
how: 'inner' | 'left' | 'right' | 'outer';
confidenceThreshold: number;
enableFuzzyMatching: boolean;
fuzzyThreshold: number;
cacheNormalizedValues: boolean;
batchSize: number;
autoSelectNormalizers: boolean;
preserveOriginalColumns: boolean;
}
export interface JoinMatchResult {
leftIndex: number;
rightIndex: number;
confidence: number;
matchType: 'exact' | 'normalized' | 'fuzzy';
normalizerUsed?: string;
metadata: Record<string, any>;
}
export interface SemanticJoinResult<T = any> {
data: T;
matches: JoinMatchResult[];
performance: {
totalTime: number;
normalizationTime: number;
matchingTime: number;
joinTime: number;
cacheHits: number;
totalOperations: number;
};
statistics: {
inputRowsLeft: number;
inputRowsRight: number;
outputRows: number;
matchedRows: number;
confidence: {
average: number;
median: number;
distribution: Record<string, number>;
};
};
}
export interface NormalizerFunction {
(value: any): string;
}
export interface NormalizerRegistry {
email: NormalizerFunction;
phone: NormalizerFunction;
name: NormalizerFunction;
address: NormalizerFunction;
numeric: NormalizerFunction;
date: NormalizerFunction;
categorical: NormalizerFunction;
default: NormalizerFunction;
}
export declare class SemanticJoinOperator {
private semanticsLayer;
private cidRegistry;
private statisticalAnalyzer;
private adapterRegistry;
private joinPlanner;
private confidenceCalculator;
private normalizers;
private cache;
private cacheStats;
private cacheEnabled;
constructor(cidRegistry: CIDRegistry, semanticsLayer?: ShadowSemanticsLayer, statisticalAnalyzer?: StatisticalAnalyzer);
semanticJoin<T = any>(left: any, right: any, options?: Partial<SemanticJoinOptions>): Promise<SemanticJoinResult<T>>;
private performSemanticMatch;
private calculateConfidenceScores;
private buildContextMap;
private identifyNormalizer;
private selectNormalizerFromCID;
private inferNormalizerFromConcept;
private resolveDominantContext;
private resolveRepresentativeStats;
private computeSemanticAlignment;
private computeContextualRelevance;
private createCompositeKey;
private normalizeWithCache;
private findFuzzyMatches;
private describeNormalizers;
private calculateStringSimilarity;
private levenshteinDistance;
private executeJoin;
private selectOptimalNormalizer;
private areTypesCompatible;
private resolveJoinColumns;
private initializeNormalizers;
clearCache(): void;
getCacheStats(): {
hits: number;
misses: number;
hitRate: number;
};
addNormalizer(name: keyof NormalizerRegistry, normalizer: NormalizerFunction): void;
}
//# sourceMappingURL=semantic-join.d.ts.map