datapilot-cli
Version:
Enterprise-grade streaming multi-format data analysis with comprehensive statistical insights and intelligent relationship detection - supports CSV, JSON, Excel, TSV, Parquet - memory-efficient, cross-platform
120 lines • 3.42 kB
TypeScript
/**
* Parallel Streaming Analyzer
* High-performance streaming analysis with parallel processing capabilities
*/
import { EventEmitter } from 'events';
interface ParallelStreamingOptions {
chunkSize?: number;
memoryThresholdMB?: number;
maxRowsAnalyzed?: number;
enabledAnalyses?: string[];
significanceLevel?: number;
maxCorrelationPairs?: number;
enableMultivariate?: boolean;
enableParallelProcessing?: boolean;
maxWorkers?: number;
batchSize?: number;
parallelThreshold?: number;
memoryLimitPerWorker?: number;
}
interface ParallelAnalysisResult {
overview: any;
descriptiveStats: any[];
correlations: any[];
outliers: any[];
frequencyDistributions: any[];
performanceMetrics: {
totalExecutionTime: number;
parallelExecutionTime: number;
sequentialExecutionTime: number;
speedupFactor: number;
tasksExecuted: number;
memoryEfficiency: number;
};
}
/**
* Enhanced streaming analyzer with parallel processing capabilities
*/
export declare class ParallelStreamingAnalyzer extends EventEmitter {
private options;
private parallelAnalyzer;
private baseAnalyzer;
private isInitialized;
constructor(options?: ParallelStreamingOptions);
/**
* Analyze file with intelligent parallel/sequential routing
*/
analyzeFile(filePath: string): Promise<ParallelAnalysisResult>;
/**
* Analyze file characteristics to determine processing strategy
*/
private analyzeFileCharacteristics;
/**
* Determine if parallel processing should be used
*/
private shouldUseParallelProcessing;
/**
* Parallel file analysis for large datasets
*/
private analyzeFileParallel;
/**
* Sequential file analysis for smaller datasets
*/
private analyzeFileSequential;
/**
* Parse file in parallel chunks
*/
private parseFileInParallel;
/**
* Calculate statistics in parallel
*/
private calculateParallelStatistics;
/**
* Calculate correlations in parallel
*/
private calculateParallelCorrelations;
/**
* Detect outliers in parallel
*/
private detectParallelOutliers;
/**
* Calculate frequency distributions in parallel
*/
private calculateParallelFrequencies;
/**
* Calculate optimal chunk size for file parsing
*/
private calculateOptimalChunkSize;
/**
* Calculate performance metrics
*/
private calculatePerformanceMetrics;
/**
* Calculate memory efficiency metric
*/
private calculateMemoryEfficiency;
/**
* Get real-time performance statistics
*/
getPerformanceStats(): {
statistical: import("../../performance").WorkerStats;
parsing: import("../../performance").WorkerStats;
total: {
totalWorkers: number;
availableWorkers: number;
busyWorkers: number;
queuedTasks: number;
activeTasksCount: number;
};
};
/**
* Gracefully shutdown parallel resources
*/
shutdown(): Promise<void>;
}
/**
* Factory function for creating parallel streaming analyzer
*/
export declare function createParallelStreamingAnalyzer(options?: ParallelStreamingOptions): ParallelStreamingAnalyzer;
export {};
//# sourceMappingURL=parallel-streaming-analyzer.d.ts.map