semantic-ds-toolkit
Version:
Performance-first semantic layer for modern data stacks - Stable Column Anchors & intelligent inference
83 lines • 2.9 kB
TypeScript
import { SemanticJoinOptions } from './semantic-join';
import { DataFrameLike, SemanticContext } from '../core/shadow-semantics';
export interface JoinPlan {
strategy: 'hash_join' | 'nested_loop' | 'sort_merge' | 'broadcast_join';
estimatedCost: number;
estimatedRows: number;
optimizations: string[];
indexingStrategy: 'build_left' | 'build_right' | 'dual_index' | 'none';
batchingStrategy: {
enabled: boolean;
batchSize: number;
parallelism: number;
};
cacheStrategy: {
enableValueCache: boolean;
enableIndexCache: boolean;
cacheSize: number;
};
normalizationPlan: NormalizationPlan;
}
export interface NormalizationPlan {
leftColumns: ColumnNormalizationPlan[];
rightColumns: ColumnNormalizationPlan[];
precomputeNormalization: boolean;
estimatedNormalizationCost: number;
}
export interface ColumnNormalizationPlan {
column: string;
normalizer: string;
confidence: number;
selectivityEstimate: number;
cardinalityEstimate: number;
costEstimate: number;
}
export interface DataFrameStatistics {
rowCount: number;
columnCount: number;
avgRowSize: number;
uniquenessRatios: Record<string, number>;
nullRatios: Record<string, number>;
dataTypes: Record<string, string>;
estimatedSize: number;
}
export interface JoinSelectivityEstimate {
expectedMatches: number;
selectivity: number;
confidence: number;
reasoning: string[];
}
export declare class SemanticJoinPlanner {
private costModel;
constructor();
planOptimalJoin(leftDf: DataFrameLike, rightDf: DataFrameLike, leftContext: Record<string, SemanticContext | null>, rightContext: Record<string, SemanticContext | null>, options: Partial<SemanticJoinOptions>): JoinPlan;
private analyzeDataFrameStatistics;
private estimateJoinSelectivity;
private planNormalization;
private planColumnNormalization;
private selectJoinStrategy;
private planIndexing;
private planBatching;
private planCaching;
private generateOptimizations;
private shouldPrecomputeNormalization;
private inferDataType;
private estimateAverageValueSize;
private areTypesCompatible;
private resolveJoinColumns;
}
declare class JoinCostModel {
private readonly NESTED_LOOP_COST;
private readonly HASH_BUILD_COST;
private readonly HASH_PROBE_COST;
private readonly SORT_COST;
private readonly MERGE_COST;
private readonly NORMALIZATION_BASE_COST;
calculateTotalCost(leftStats: DataFrameStatistics, rightStats: DataFrameStatistics, strategy: string, selectivity: JoinSelectivityEstimate, normalizationPlan: NormalizationPlan, batchingStrategy: {
enabled: boolean;
batchSize: number;
parallelism: number;
}): number;
}
export { JoinCostModel };
//# sourceMappingURL=join-planner.d.ts.map