datapilot-cli

Version:

Enterprise-grade streaming multi-format data analysis with comprehensive statistical insights and intelligent relationship detection - supports CSV, JSON, Excel, TSV, Parquet - memory-efficient, cross-platform

github.com/Mrassimo/datapilot

Mrassimo/datapilot

98 lines • 3.25 kB

TypeScript

View Raw

/** * Parallel Analysis Engine * Orchestrates parallel processing for multi-format data analysis */ import type { WorkerTask } from './worker-pool'; export interface ParallelAnalysisOptions { maxWorkers?: number; enableMemoryMonitoring?: boolean; memoryLimitMB?: number; batchSize?: number; taskTimeout?: number; } export interface AnalysisResult { success: boolean; results: any[]; executionTime: number; totalTasks: number; failedTasks: number; memoryUsage?: number; } /** * High-performance parallel analysis engine */ export declare class ParallelAnalyzer { private statisticalWorkerPool; private parsingWorkerPool; private options; constructor(options?: ParallelAnalysisOptions); /** * Calculate descriptive statistics for multiple columns in parallel */ calculateMultipleDescriptiveStats(datasets: number[][]): Promise<AnalysisResult>; /** * Calculate correlations between multiple column pairs in parallel */ calculateMultipleCorrelations(pairs: Array<{ x: number[]; y: number[]; }>): Promise<AnalysisResult>; /** * Detect outliers in multiple columns in parallel */ detectMultipleOutliers(datasets: number[][], multiplier?: number): Promise<AnalysisResult>; /** * Calculate frequency distributions for multiple categorical columns in parallel */ calculateMultipleFrequencyDistributions(datasets: any[][]): Promise<AnalysisResult>; /** * Parse multiple CSV chunks in parallel */ parseMultipleCSVChunks(chunks: string[], options?: any): Promise<AnalysisResult>; /** * Parse multiple JSON objects in parallel */ parseMultipleJSON(jsonStrings: string[], options?: any): Promise<AnalysisResult>; /** * Detect data types for multiple columns in parallel */ detectMultipleDataTypes(columns: string[][]): Promise<AnalysisResult>; /** * Execute mixed workload (statistical + parsing) with intelligent scheduling */ executeMixedWorkload(statisticalTasks: WorkerTask[], parsingTasks: WorkerTask[]): Promise<{ statistical: AnalysisResult; parsing: AnalysisResult; }>; /** * Get performance statistics from both worker pools */ getPerformanceStats(): { statistical: import("./worker-pool").WorkerStats; parsing: import("./worker-pool").WorkerStats; total: { totalWorkers: number; availableWorkers: number; busyWorkers: number; queuedTasks: number; activeTasksCount: number; }; }; /** * Adaptive batch size calculation based on data size and available workers */ calculateOptimalBatchSize(dataSize: number, complexity?: 'low' | 'medium' | 'high'): number; /** * Gracefully shutdown both worker pools */ shutdown(): Promise<void>; } /** * Get or create the global parallel analyzer */ export declare function getGlobalParallelAnalyzer(options?: ParallelAnalysisOptions): ParallelAnalyzer; /** * Shutdown the global parallel analyzer */ export declare function shutdownGlobalParallelAnalyzer(): Promise<void>; //# sourceMappingURL=parallel-analyzer.d.ts.map