UNPKG

datapilot-cli

Version:

Enterprise-grade streaming multi-format data analysis with comprehensive statistical insights and intelligent relationship detection - supports CSV, JSON, Excel, TSV, Parquet - memory-efficient, cross-platform

333 lines 12.7 kB
"use strict"; /** * High-Performance Worker Pool Manager * Provides thread-safe parallel processing for CPU-intensive data analysis operations */ Object.defineProperty(exports, "__esModule", { value: true }); exports.WorkerPool = void 0; exports.getGlobalWorkerPool = getGlobalWorkerPool; exports.shutdownGlobalWorkerPool = shutdownGlobalWorkerPool; const worker_threads_1 = require("worker_threads"); const os_1 = require("os"); const events_1 = require("events"); const logger_1 = require("../utils/logger"); const types_1 = require("../core/types"); /** * High-performance worker pool for parallel data processing */ class WorkerPool extends events_1.EventEmitter { workers = new Set(); availableWorkers = []; busyWorkers = new Map(); taskQueue = []; activeTasksCount = 0; maxWorkers; idleTimeout; taskTimeout; enableMemoryMonitoring; memoryLimitMB; workerScript; constructor(workerScript, options = {}) { super(); this.workerScript = workerScript; this.maxWorkers = options.maxWorkers || Math.max(2, (0, os_1.cpus)().length - 1); this.idleTimeout = options.idleTimeout || 30000; // 30 seconds this.taskTimeout = options.taskTimeout || 60000; // 60 seconds this.enableMemoryMonitoring = options.enableMemoryMonitoring ?? true; this.memoryLimitMB = options.memoryLimitMB || 256; logger_1.logger.info(`Initializing worker pool with ${this.maxWorkers} workers`); } /** * Execute a task in the worker pool */ async execute(task) { return new Promise((resolve, reject) => { const queuedTask = { task, resolve, reject, timestamp: Date.now(), }; // Add to priority queue this.addToQueue(queuedTask); this.processQueue(); }); } /** * Execute multiple tasks in parallel with controlled concurrency */ async executeAll(tasks, maxConcurrency) { const concurrency = Math.min(maxConcurrency || this.maxWorkers, this.maxWorkers, tasks.length); const results = new Array(tasks.length); const errors = []; // Process tasks in batches for (let i = 0; i < tasks.length; i += concurrency) { const batch = tasks.slice(i, i + concurrency); const batchPromises = batch.map((task, batchIndex) => this.execute(task) .then((result) => { results[i + batchIndex] = result; }) .catch((error) => { errors.push(new Error(`Task ${i + batchIndex} failed: ${error.message}`)); })); await Promise.all(batchPromises); } if (errors.length > 0) { throw new types_1.DataPilotError(`${errors.length} tasks failed during parallel execution`, 'WORKER_POOL_BATCH_ERROR', types_1.ErrorSeverity.HIGH, types_1.ErrorCategory.PERFORMANCE); } return results; } /** * Add task to priority queue */ addToQueue(queuedTask) { const priority = queuedTask.task.priority || 'normal'; // Insert based on priority (high > normal > low) let insertIndex = this.taskQueue.length; if (priority === 'high') { insertIndex = this.taskQueue.findIndex((item) => (item.task.priority || 'normal') !== 'high'); if (insertIndex === -1) insertIndex = this.taskQueue.length; } else if (priority === 'normal') { insertIndex = this.taskQueue.findIndex((item) => (item.task.priority || 'normal') === 'low'); if (insertIndex === -1) insertIndex = this.taskQueue.length; } this.taskQueue.splice(insertIndex, 0, queuedTask); } /** * Process the task queue */ async processQueue() { while (this.taskQueue.length > 0 && this.availableWorkers.length > 0) { const queuedTask = this.taskQueue.shift(); const worker = this.availableWorkers.pop(); this.executeTaskOnWorker(worker, queuedTask); } // Create new workers if needed and under limit if (this.taskQueue.length > 0 && this.workers.size < this.maxWorkers) { const workersToCreate = Math.min(this.taskQueue.length, this.maxWorkers - this.workers.size); for (let i = 0; i < workersToCreate; i++) { try { await this.createWorker(); // Process queue again with new worker setImmediate(() => this.processQueue()); } catch (error) { logger_1.logger.error(`Failed to create worker: ${error.message}`); } } } } /** * Execute a task on a specific worker */ async executeTaskOnWorker(worker, queuedTask) { const { task, resolve, reject } = queuedTask; const startTime = Date.now(); this.busyWorkers.set(worker, task.id); this.activeTasksCount++; // Set up timeout const timeout = setTimeout(() => { this.handleWorkerTimeout(worker, task.id); reject(new Error(`Task ${task.id} timed out after ${this.taskTimeout}ms`)); }, task.timeout || this.taskTimeout); // Handle worker response const messageHandler = (result) => { if (result.taskId === task.id) { clearTimeout(timeout); worker.off('message', messageHandler); worker.off('error', errorHandler); this.busyWorkers.delete(worker); this.activeTasksCount--; this.availableWorkers.push(worker); const executionTime = Date.now() - startTime; if (result.success) { logger_1.logger.debug(`Task ${task.id} completed in ${executionTime}ms`); resolve(result.result); } else { logger_1.logger.warn(`Task ${task.id} failed: ${result.error}`); reject(new Error(result.error)); } // Process next task in queue setImmediate(() => this.processQueue()); } }; const errorHandler = (error) => { clearTimeout(timeout); worker.off('message', messageHandler); worker.off('error', errorHandler); this.busyWorkers.delete(worker); this.activeTasksCount--; // Remove problematic worker and create a new one this.removeWorker(worker); setImmediate(() => this.createWorker()); reject(new types_1.DataPilotError(`Worker error for task ${task.id}: ${error.message}`, 'WORKER_ERROR', types_1.ErrorSeverity.HIGH, types_1.ErrorCategory.PERFORMANCE)); }; worker.on('message', messageHandler); worker.on('error', errorHandler); // Send task to worker worker.postMessage({ taskId: task.id, type: task.type, data: task.data, enableMemoryMonitoring: this.enableMemoryMonitoring, memoryLimitMB: this.memoryLimitMB, }); } /** * Create a new worker */ async createWorker() { return new Promise((resolve, reject) => { try { const worker = new worker_threads_1.Worker(this.workerScript, { resourceLimits: { maxOldGenerationSizeMb: this.memoryLimitMB, maxYoungGenerationSizeMb: Math.floor(this.memoryLimitMB * 0.3), }, }); worker.on('error', (error) => { logger_1.logger.error(`Worker creation error: ${error.message}`); this.removeWorker(worker); reject(error); }); worker.on('exit', (code) => { if (code !== 0) { logger_1.logger.warn(`Worker exited with code ${code}`); } this.removeWorker(worker); }); // Worker is ready worker.on('message', (message) => { if (message.type === 'ready') { this.workers.add(worker); this.availableWorkers.push(worker); logger_1.logger.debug(`Worker created, pool size: ${this.workers.size}`); resolve(worker); } }); // Setup idle timeout this.setupWorkerIdleTimeout(worker); } catch (error) { reject(error); } }); } /** * Setup idle timeout for worker cleanup */ setupWorkerIdleTimeout(worker) { const timeoutId = setTimeout(() => { if (this.availableWorkers.includes(worker) && this.workers.size > 2) { this.removeWorker(worker); logger_1.logger.debug(`Removed idle worker, pool size: ${this.workers.size}`); } }, this.idleTimeout); // Clear timeout if worker becomes busy const originalPostMessage = worker.postMessage.bind(worker); worker.postMessage = (message) => { clearTimeout(timeoutId); return originalPostMessage(message); }; } /** * Handle worker timeout */ handleWorkerTimeout(worker, taskId) { logger_1.logger.warn(`Task ${taskId} timed out, terminating worker`); this.busyWorkers.delete(worker); this.activeTasksCount--; this.removeWorker(worker); // Create replacement worker setImmediate(() => this.createWorker()); } /** * Remove a worker from the pool */ removeWorker(worker) { this.workers.delete(worker); this.availableWorkers = this.availableWorkers.filter((w) => w !== worker); this.busyWorkers.delete(worker); try { worker.terminate(); } catch (error) { logger_1.logger.warn(`Error terminating worker: ${error.message}`); } } /** * Get pool statistics */ getStats() { return { totalWorkers: this.workers.size, availableWorkers: this.availableWorkers.length, busyWorkers: this.busyWorkers.size, queuedTasks: this.taskQueue.length, activeTasksCount: this.activeTasksCount, maxWorkers: this.maxWorkers, }; } /** * Gracefully shutdown the worker pool */ async shutdown() { logger_1.logger.info(`Shutting down worker pool with ${this.workers.size} workers`); // Wait for active tasks to complete (with timeout) const maxWaitTime = 30000; // 30 seconds const startTime = Date.now(); while (this.activeTasksCount > 0 && Date.now() - startTime < maxWaitTime) { await new Promise((resolve) => setTimeout(resolve, 100)); } // Terminate all workers const terminationPromises = Array.from(this.workers).map((worker) => new Promise((resolve) => { worker .terminate() .then(() => resolve()) .catch(() => resolve()); })); await Promise.all(terminationPromises); this.workers.clear(); this.availableWorkers = []; this.busyWorkers.clear(); this.taskQueue = []; logger_1.logger.info('Worker pool shutdown complete'); } /** * Check if we're running in the main thread */ static isMainThread() { return worker_threads_1.isMainThread; } } exports.WorkerPool = WorkerPool; /** * Global worker pool instance */ let globalWorkerPool = null; /** * Get or create the global worker pool */ function getGlobalWorkerPool(workerScript, options) { if (!globalWorkerPool && workerScript) { globalWorkerPool = new WorkerPool(workerScript, options); } if (!globalWorkerPool) { throw new types_1.DataPilotError('Worker pool not initialized. Call with workerScript parameter first.', 'WORKER_POOL_NOT_INITIALIZED', types_1.ErrorSeverity.HIGH, types_1.ErrorCategory.PERFORMANCE); } return globalWorkerPool; } /** * Shutdown the global worker pool */ async function shutdownGlobalWorkerPool() { if (globalWorkerPool) { await globalWorkerPool.shutdown(); globalWorkerPool = null; } } //# sourceMappingURL=worker-pool.js.map