UNPKG

datapilot-cli

Version:

Enterprise-grade streaming multi-format data analysis with comprehensive statistical insights and intelligent relationship detection - supports CSV, JSON, Excel, TSV, Parquet - memory-efficient, cross-platform

175 lines 4.99 kB
/** * Intelligent Chunking Engine * Advanced chunk size adaptation based on data characteristics and system performance */ import { EventEmitter } from 'events'; export interface ChunkingOptions { baseChunkSize?: number; minChunkSize?: number; maxChunkSize?: number; adaptationSensitivity?: number; performanceWindow?: number; complexityWeights?: ComplexityWeights; enableLearning?: boolean; maxLearningHistory?: number; } export interface ComplexityWeights { dataTypeComplexity: number; encodingComplexity: number; structuralComplexity: number; contentComplexity: number; memoryPressure: number; ioPerformance: number; } export interface DataCharacteristics { fileSize: number; estimatedRows: number; averageLineLength: number; encoding: string; hasQuotedFields: boolean; hasEscapedFields: boolean; columnCount: number; dataTypes: string[]; nullDensity: number; compressionRatio: number; } export interface SystemMetrics { memoryPressure: number; cpuUsage: number; ioLatency: number; throughput: number; errorRate: number; } export interface ChunkDecision { chunkSize: number; reasoning: string[]; confidence: number; adaptationFactors: { dataComplexity: number; systemPerformance: number; memoryConstraint: number; learningAdjustment: number; }; expectedPerformance: { processingTime: number; memoryUsage: number; throughput: number; }; } interface LearningData { chunkSize: number; dataCharacteristics: DataCharacteristics; systemMetrics: SystemMetrics; actualPerformance: { processingTime: number; memoryUsage: number; throughput: number; errorCount: number; }; satisfaction: number; } /** * Intelligent chunk size adaptation with machine learning */ export declare class IntelligentChunker extends EventEmitter { private options; private performanceHistory; private recentDecisions; private learningModel; private systemBaseline; constructor(options?: ChunkingOptions); /** * Analyze data characteristics from file sample */ analyzeDataCharacteristics(filePath: string, sampleSize?: number): Promise<DataCharacteristics>; /** * Calculate optimal chunk size based on data characteristics and system state */ calculateOptimalChunkSize(dataCharacteristics: DataCharacteristics, currentSystemMetrics?: SystemMetrics): ChunkDecision; /** * Record actual performance for learning */ recordPerformance(chunkSize: number, dataCharacteristics: DataCharacteristics, actualPerformance: LearningData['actualPerformance']): void; /** * Calculate data complexity score */ private calculateDataComplexity; /** * Calculate system performance factor */ private calculateSystemPerformanceFactor; /** * Calculate learning-based adjustment */ private calculateLearningAdjustment; /** * Find similar historical cases for learning */ private findSimilarCases; /** * Calculate data similarity between two data characteristics */ private calculateDataSimilarity; /** * Calculate system similarity between two system metrics */ private calculateSystemSimilarity; /** * Calculate satisfaction score from expected vs actual performance */ private calculateSatisfaction; /** * Helper methods for complexity calculations */ private calculateTypeComplexity; private calculateEncodingComplexity; private calculateStructuralComplexity; private calculateContentComplexity; /** * Utility methods */ private getCurrentSystemMetrics; private detectEncoding; private estimateColumnCount; private estimateDataTypes; private calculateTypeSetSimilarity; private calculateConfidence; private predictPerformance; private initializeLearningModel; private updateLearningModel; private formatBytes; /** * Get learning statistics */ getLearningStats(): { historySize: number; averageSatisfaction: number; recentDecisions: number; learningModel: { [k: string]: number; }; confidenceDistribution: number[]; }; /** * Reset learning data */ resetLearning(): void; /** * Export learning data for analysis */ exportLearningData(): LearningData[]; /** * Import learning data */ importLearningData(data: LearningData[]): void; } /** * Get or create global intelligent chunker */ export declare function getGlobalIntelligentChunker(options?: ChunkingOptions): IntelligentChunker; /** * Shutdown global intelligent chunker */ export declare function shutdownGlobalIntelligentChunker(): void; export {}; //# sourceMappingURL=intelligent-chunker.d.ts.map