datapilot-cli
Version:
Enterprise-grade streaming multi-format data analysis with comprehensive statistical insights and intelligent relationship detection - supports CSV, JSON, Excel, TSV, Parquet - memory-efficient, cross-platform
175 lines • 4.99 kB
TypeScript
/**
* Intelligent Chunking Engine
* Advanced chunk size adaptation based on data characteristics and system performance
*/
import { EventEmitter } from 'events';
export interface ChunkingOptions {
baseChunkSize?: number;
minChunkSize?: number;
maxChunkSize?: number;
adaptationSensitivity?: number;
performanceWindow?: number;
complexityWeights?: ComplexityWeights;
enableLearning?: boolean;
maxLearningHistory?: number;
}
export interface ComplexityWeights {
dataTypeComplexity: number;
encodingComplexity: number;
structuralComplexity: number;
contentComplexity: number;
memoryPressure: number;
ioPerformance: number;
}
export interface DataCharacteristics {
fileSize: number;
estimatedRows: number;
averageLineLength: number;
encoding: string;
hasQuotedFields: boolean;
hasEscapedFields: boolean;
columnCount: number;
dataTypes: string[];
nullDensity: number;
compressionRatio: number;
}
export interface SystemMetrics {
memoryPressure: number;
cpuUsage: number;
ioLatency: number;
throughput: number;
errorRate: number;
}
export interface ChunkDecision {
chunkSize: number;
reasoning: string[];
confidence: number;
adaptationFactors: {
dataComplexity: number;
systemPerformance: number;
memoryConstraint: number;
learningAdjustment: number;
};
expectedPerformance: {
processingTime: number;
memoryUsage: number;
throughput: number;
};
}
interface LearningData {
chunkSize: number;
dataCharacteristics: DataCharacteristics;
systemMetrics: SystemMetrics;
actualPerformance: {
processingTime: number;
memoryUsage: number;
throughput: number;
errorCount: number;
};
satisfaction: number;
}
/**
* Intelligent chunk size adaptation with machine learning
*/
export declare class IntelligentChunker extends EventEmitter {
private options;
private performanceHistory;
private recentDecisions;
private learningModel;
private systemBaseline;
constructor(options?: ChunkingOptions);
/**
* Analyze data characteristics from file sample
*/
analyzeDataCharacteristics(filePath: string, sampleSize?: number): Promise<DataCharacteristics>;
/**
* Calculate optimal chunk size based on data characteristics and system state
*/
calculateOptimalChunkSize(dataCharacteristics: DataCharacteristics, currentSystemMetrics?: SystemMetrics): ChunkDecision;
/**
* Record actual performance for learning
*/
recordPerformance(chunkSize: number, dataCharacteristics: DataCharacteristics, actualPerformance: LearningData['actualPerformance']): void;
/**
* Calculate data complexity score
*/
private calculateDataComplexity;
/**
* Calculate system performance factor
*/
private calculateSystemPerformanceFactor;
/**
* Calculate learning-based adjustment
*/
private calculateLearningAdjustment;
/**
* Find similar historical cases for learning
*/
private findSimilarCases;
/**
* Calculate data similarity between two data characteristics
*/
private calculateDataSimilarity;
/**
* Calculate system similarity between two system metrics
*/
private calculateSystemSimilarity;
/**
* Calculate satisfaction score from expected vs actual performance
*/
private calculateSatisfaction;
/**
* Helper methods for complexity calculations
*/
private calculateTypeComplexity;
private calculateEncodingComplexity;
private calculateStructuralComplexity;
private calculateContentComplexity;
/**
* Utility methods
*/
private getCurrentSystemMetrics;
private detectEncoding;
private estimateColumnCount;
private estimateDataTypes;
private calculateTypeSetSimilarity;
private calculateConfidence;
private predictPerformance;
private initializeLearningModel;
private updateLearningModel;
private formatBytes;
/**
* Get learning statistics
*/
getLearningStats(): {
historySize: number;
averageSatisfaction: number;
recentDecisions: number;
learningModel: {
[k: string]: number;
};
confidenceDistribution: number[];
};
/**
* Reset learning data
*/
resetLearning(): void;
/**
* Export learning data for analysis
*/
exportLearningData(): LearningData[];
/**
* Import learning data
*/
importLearningData(data: LearningData[]): void;
}
/**
* Get or create global intelligent chunker
*/
export declare function getGlobalIntelligentChunker(options?: ChunkingOptions): IntelligentChunker;
/**
* Shutdown global intelligent chunker
*/
export declare function shutdownGlobalIntelligentChunker(): void;
export {};
//# sourceMappingURL=intelligent-chunker.d.ts.map