datapilot-cli
Version:
Enterprise-grade streaming multi-format data analysis with comprehensive statistical insights and intelligent relationship detection - supports CSV, JSON, Excel, TSV, Parquet - memory-efficient, cross-platform
98 lines • 3.25 kB
TypeScript
/**
* Parallel Analysis Engine
* Orchestrates parallel processing for multi-format data analysis
*/
import type { WorkerTask } from './worker-pool';
export interface ParallelAnalysisOptions {
maxWorkers?: number;
enableMemoryMonitoring?: boolean;
memoryLimitMB?: number;
batchSize?: number;
taskTimeout?: number;
}
export interface AnalysisResult {
success: boolean;
results: any[];
executionTime: number;
totalTasks: number;
failedTasks: number;
memoryUsage?: number;
}
/**
* High-performance parallel analysis engine
*/
export declare class ParallelAnalyzer {
private statisticalWorkerPool;
private parsingWorkerPool;
private options;
constructor(options?: ParallelAnalysisOptions);
/**
* Calculate descriptive statistics for multiple columns in parallel
*/
calculateMultipleDescriptiveStats(datasets: number[][]): Promise<AnalysisResult>;
/**
* Calculate correlations between multiple column pairs in parallel
*/
calculateMultipleCorrelations(pairs: Array<{
x: number[];
y: number[];
}>): Promise<AnalysisResult>;
/**
* Detect outliers in multiple columns in parallel
*/
detectMultipleOutliers(datasets: number[][], multiplier?: number): Promise<AnalysisResult>;
/**
* Calculate frequency distributions for multiple categorical columns in parallel
*/
calculateMultipleFrequencyDistributions(datasets: any[][]): Promise<AnalysisResult>;
/**
* Parse multiple CSV chunks in parallel
*/
parseMultipleCSVChunks(chunks: string[], options?: any): Promise<AnalysisResult>;
/**
* Parse multiple JSON objects in parallel
*/
parseMultipleJSON(jsonStrings: string[], options?: any): Promise<AnalysisResult>;
/**
* Detect data types for multiple columns in parallel
*/
detectMultipleDataTypes(columns: string[][]): Promise<AnalysisResult>;
/**
* Execute mixed workload (statistical + parsing) with intelligent scheduling
*/
executeMixedWorkload(statisticalTasks: WorkerTask[], parsingTasks: WorkerTask[]): Promise<{
statistical: AnalysisResult;
parsing: AnalysisResult;
}>;
/**
* Get performance statistics from both worker pools
*/
getPerformanceStats(): {
statistical: import("./worker-pool").WorkerStats;
parsing: import("./worker-pool").WorkerStats;
total: {
totalWorkers: number;
availableWorkers: number;
busyWorkers: number;
queuedTasks: number;
activeTasksCount: number;
};
};
/**
* Adaptive batch size calculation based on data size and available workers
*/
calculateOptimalBatchSize(dataSize: number, complexity?: 'low' | 'medium' | 'high'): number;
/**
* Gracefully shutdown both worker pools
*/
shutdown(): Promise<void>;
}
/**
* Get or create the global parallel analyzer
*/
export declare function getGlobalParallelAnalyzer(options?: ParallelAnalysisOptions): ParallelAnalyzer;
/**
* Shutdown the global parallel analyzer
*/
export declare function shutdownGlobalParallelAnalyzer(): Promise<void>;
//# sourceMappingURL=parallel-analyzer.d.ts.map