datapilot-cli
Version:
Enterprise-grade streaming multi-format data analysis with comprehensive statistical insights and intelligent relationship detection - supports CSV, JSON, Excel, TSV, Parquet - memory-efficient, cross-platform
333 lines • 12.7 kB
JavaScript
;
/**
* High-Performance Worker Pool Manager
* Provides thread-safe parallel processing for CPU-intensive data analysis operations
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.WorkerPool = void 0;
exports.getGlobalWorkerPool = getGlobalWorkerPool;
exports.shutdownGlobalWorkerPool = shutdownGlobalWorkerPool;
const worker_threads_1 = require("worker_threads");
const os_1 = require("os");
const events_1 = require("events");
const logger_1 = require("../utils/logger");
const types_1 = require("../core/types");
/**
* High-performance worker pool for parallel data processing
*/
class WorkerPool extends events_1.EventEmitter {
workers = new Set();
availableWorkers = [];
busyWorkers = new Map();
taskQueue = [];
activeTasksCount = 0;
maxWorkers;
idleTimeout;
taskTimeout;
enableMemoryMonitoring;
memoryLimitMB;
workerScript;
constructor(workerScript, options = {}) {
super();
this.workerScript = workerScript;
this.maxWorkers = options.maxWorkers || Math.max(2, (0, os_1.cpus)().length - 1);
this.idleTimeout = options.idleTimeout || 30000; // 30 seconds
this.taskTimeout = options.taskTimeout || 60000; // 60 seconds
this.enableMemoryMonitoring = options.enableMemoryMonitoring ?? true;
this.memoryLimitMB = options.memoryLimitMB || 256;
logger_1.logger.info(`Initializing worker pool with ${this.maxWorkers} workers`);
}
/**
* Execute a task in the worker pool
*/
async execute(task) {
return new Promise((resolve, reject) => {
const queuedTask = {
task,
resolve,
reject,
timestamp: Date.now(),
};
// Add to priority queue
this.addToQueue(queuedTask);
this.processQueue();
});
}
/**
* Execute multiple tasks in parallel with controlled concurrency
*/
async executeAll(tasks, maxConcurrency) {
const concurrency = Math.min(maxConcurrency || this.maxWorkers, this.maxWorkers, tasks.length);
const results = new Array(tasks.length);
const errors = [];
// Process tasks in batches
for (let i = 0; i < tasks.length; i += concurrency) {
const batch = tasks.slice(i, i + concurrency);
const batchPromises = batch.map((task, batchIndex) => this.execute(task)
.then((result) => {
results[i + batchIndex] = result;
})
.catch((error) => {
errors.push(new Error(`Task ${i + batchIndex} failed: ${error.message}`));
}));
await Promise.all(batchPromises);
}
if (errors.length > 0) {
throw new types_1.DataPilotError(`${errors.length} tasks failed during parallel execution`, 'WORKER_POOL_BATCH_ERROR', types_1.ErrorSeverity.HIGH, types_1.ErrorCategory.PERFORMANCE);
}
return results;
}
/**
* Add task to priority queue
*/
addToQueue(queuedTask) {
const priority = queuedTask.task.priority || 'normal';
// Insert based on priority (high > normal > low)
let insertIndex = this.taskQueue.length;
if (priority === 'high') {
insertIndex = this.taskQueue.findIndex((item) => (item.task.priority || 'normal') !== 'high');
if (insertIndex === -1)
insertIndex = this.taskQueue.length;
}
else if (priority === 'normal') {
insertIndex = this.taskQueue.findIndex((item) => (item.task.priority || 'normal') === 'low');
if (insertIndex === -1)
insertIndex = this.taskQueue.length;
}
this.taskQueue.splice(insertIndex, 0, queuedTask);
}
/**
* Process the task queue
*/
async processQueue() {
while (this.taskQueue.length > 0 && this.availableWorkers.length > 0) {
const queuedTask = this.taskQueue.shift();
const worker = this.availableWorkers.pop();
this.executeTaskOnWorker(worker, queuedTask);
}
// Create new workers if needed and under limit
if (this.taskQueue.length > 0 && this.workers.size < this.maxWorkers) {
const workersToCreate = Math.min(this.taskQueue.length, this.maxWorkers - this.workers.size);
for (let i = 0; i < workersToCreate; i++) {
try {
await this.createWorker();
// Process queue again with new worker
setImmediate(() => this.processQueue());
}
catch (error) {
logger_1.logger.error(`Failed to create worker: ${error.message}`);
}
}
}
}
/**
* Execute a task on a specific worker
*/
async executeTaskOnWorker(worker, queuedTask) {
const { task, resolve, reject } = queuedTask;
const startTime = Date.now();
this.busyWorkers.set(worker, task.id);
this.activeTasksCount++;
// Set up timeout
const timeout = setTimeout(() => {
this.handleWorkerTimeout(worker, task.id);
reject(new Error(`Task ${task.id} timed out after ${this.taskTimeout}ms`));
}, task.timeout || this.taskTimeout);
// Handle worker response
const messageHandler = (result) => {
if (result.taskId === task.id) {
clearTimeout(timeout);
worker.off('message', messageHandler);
worker.off('error', errorHandler);
this.busyWorkers.delete(worker);
this.activeTasksCount--;
this.availableWorkers.push(worker);
const executionTime = Date.now() - startTime;
if (result.success) {
logger_1.logger.debug(`Task ${task.id} completed in ${executionTime}ms`);
resolve(result.result);
}
else {
logger_1.logger.warn(`Task ${task.id} failed: ${result.error}`);
reject(new Error(result.error));
}
// Process next task in queue
setImmediate(() => this.processQueue());
}
};
const errorHandler = (error) => {
clearTimeout(timeout);
worker.off('message', messageHandler);
worker.off('error', errorHandler);
this.busyWorkers.delete(worker);
this.activeTasksCount--;
// Remove problematic worker and create a new one
this.removeWorker(worker);
setImmediate(() => this.createWorker());
reject(new types_1.DataPilotError(`Worker error for task ${task.id}: ${error.message}`, 'WORKER_ERROR', types_1.ErrorSeverity.HIGH, types_1.ErrorCategory.PERFORMANCE));
};
worker.on('message', messageHandler);
worker.on('error', errorHandler);
// Send task to worker
worker.postMessage({
taskId: task.id,
type: task.type,
data: task.data,
enableMemoryMonitoring: this.enableMemoryMonitoring,
memoryLimitMB: this.memoryLimitMB,
});
}
/**
* Create a new worker
*/
async createWorker() {
return new Promise((resolve, reject) => {
try {
const worker = new worker_threads_1.Worker(this.workerScript, {
resourceLimits: {
maxOldGenerationSizeMb: this.memoryLimitMB,
maxYoungGenerationSizeMb: Math.floor(this.memoryLimitMB * 0.3),
},
});
worker.on('error', (error) => {
logger_1.logger.error(`Worker creation error: ${error.message}`);
this.removeWorker(worker);
reject(error);
});
worker.on('exit', (code) => {
if (code !== 0) {
logger_1.logger.warn(`Worker exited with code ${code}`);
}
this.removeWorker(worker);
});
// Worker is ready
worker.on('message', (message) => {
if (message.type === 'ready') {
this.workers.add(worker);
this.availableWorkers.push(worker);
logger_1.logger.debug(`Worker created, pool size: ${this.workers.size}`);
resolve(worker);
}
});
// Setup idle timeout
this.setupWorkerIdleTimeout(worker);
}
catch (error) {
reject(error);
}
});
}
/**
* Setup idle timeout for worker cleanup
*/
setupWorkerIdleTimeout(worker) {
const timeoutId = setTimeout(() => {
if (this.availableWorkers.includes(worker) && this.workers.size > 2) {
this.removeWorker(worker);
logger_1.logger.debug(`Removed idle worker, pool size: ${this.workers.size}`);
}
}, this.idleTimeout);
// Clear timeout if worker becomes busy
const originalPostMessage = worker.postMessage.bind(worker);
worker.postMessage = (message) => {
clearTimeout(timeoutId);
return originalPostMessage(message);
};
}
/**
* Handle worker timeout
*/
handleWorkerTimeout(worker, taskId) {
logger_1.logger.warn(`Task ${taskId} timed out, terminating worker`);
this.busyWorkers.delete(worker);
this.activeTasksCount--;
this.removeWorker(worker);
// Create replacement worker
setImmediate(() => this.createWorker());
}
/**
* Remove a worker from the pool
*/
removeWorker(worker) {
this.workers.delete(worker);
this.availableWorkers = this.availableWorkers.filter((w) => w !== worker);
this.busyWorkers.delete(worker);
try {
worker.terminate();
}
catch (error) {
logger_1.logger.warn(`Error terminating worker: ${error.message}`);
}
}
/**
* Get pool statistics
*/
getStats() {
return {
totalWorkers: this.workers.size,
availableWorkers: this.availableWorkers.length,
busyWorkers: this.busyWorkers.size,
queuedTasks: this.taskQueue.length,
activeTasksCount: this.activeTasksCount,
maxWorkers: this.maxWorkers,
};
}
/**
* Gracefully shutdown the worker pool
*/
async shutdown() {
logger_1.logger.info(`Shutting down worker pool with ${this.workers.size} workers`);
// Wait for active tasks to complete (with timeout)
const maxWaitTime = 30000; // 30 seconds
const startTime = Date.now();
while (this.activeTasksCount > 0 && Date.now() - startTime < maxWaitTime) {
await new Promise((resolve) => setTimeout(resolve, 100));
}
// Terminate all workers
const terminationPromises = Array.from(this.workers).map((worker) => new Promise((resolve) => {
worker
.terminate()
.then(() => resolve())
.catch(() => resolve());
}));
await Promise.all(terminationPromises);
this.workers.clear();
this.availableWorkers = [];
this.busyWorkers.clear();
this.taskQueue = [];
logger_1.logger.info('Worker pool shutdown complete');
}
/**
* Check if we're running in the main thread
*/
static isMainThread() {
return worker_threads_1.isMainThread;
}
}
exports.WorkerPool = WorkerPool;
/**
* Global worker pool instance
*/
let globalWorkerPool = null;
/**
* Get or create the global worker pool
*/
function getGlobalWorkerPool(workerScript, options) {
if (!globalWorkerPool && workerScript) {
globalWorkerPool = new WorkerPool(workerScript, options);
}
if (!globalWorkerPool) {
throw new types_1.DataPilotError('Worker pool not initialized. Call with workerScript parameter first.', 'WORKER_POOL_NOT_INITIALIZED', types_1.ErrorSeverity.HIGH, types_1.ErrorCategory.PERFORMANCE);
}
return globalWorkerPool;
}
/**
* Shutdown the global worker pool
*/
async function shutdownGlobalWorkerPool() {
if (globalWorkerPool) {
await globalWorkerPool.shutdown();
globalWorkerPool = null;
}
}
//# sourceMappingURL=worker-pool.js.map