UNPKG

spawn-workers

Version:

A high-performance worker pool library for Node.js that spawns worker processes to handle tasks in parallel

323 lines 13 kB
import fs from "fs/promises"; import { ChildProcess, fork } from "node:child_process"; import { createWriteStream, WriteStream } from "node:fs"; import process from "node:process"; export class WorkerManager { workers = []; config; optionalConfig; logFile; outputFile; failureOutputFile; startedAt = 0; currentIndex = 0; dataEntries = []; workerIntervalId; isShuttingDown = false; constructor(config) { // Validate required config this.validateConfig(config); // Resolve file paths (convert relative to absolute) this.config = { tickDuration: 500, initialIndex: 0, totalEntries: 0, batchSize: 0, maxPendingJobs: 0, env: {}, ...config, onComplete: config.onComplete ?? (() => { }), onStatusUpdate: config.onStatusUpdate ?? (() => { }), onError: config.onError ?? (() => { }), }; this.optionalConfig = { logFilePath: config.logFilePath, outputFilePath: config.outputFilePath, failureOutputFilePath: config.failureOutputFilePath, overwriteOutputFile: config.overwriteOutputFile || false, }; } validateConfig(config) { if (config.processCount <= 0) { throw new Error("processCount must be greater than 0"); } if (config.maxConcurrency <= 0) { throw new Error("maxConcurrency must be greater than 0"); } } async initialize() { try { const dataContent = await fs.readFile(this.config.dataFilePath, "utf8"); this.dataEntries = dataContent.trim().split("\n"); // Set up total entries if not specified if (!this.config.totalEntries) { this.config.totalEntries = Math.max(0, this.dataEntries.length - this.config.initialIndex); } // Set up batch size if not specified (defaults to data length / process count) if (!this.config.batchSize) { this.config.batchSize = Math.ceil(this.dataEntries.length / this.config.processCount); } // Set up max pending jobs if not specified if (!this.config.maxPendingJobs) { this.config.maxPendingJobs = this.config.batchSize * 2; } // Validate indices if (this.config.initialIndex >= this.dataEntries.length) { throw new Error(`initialIndex (${this.config.initialIndex}) is greater than or equal to data length (${this.dataEntries.length})`); } this.currentIndex = this.config.initialIndex; // Set up logging if (this.optionalConfig.logFilePath) { this.logFile = createWriteStream(this.optionalConfig.logFilePath, { flags: "a", encoding: "utf8", }); } // Set up output file if (this.optionalConfig.outputFilePath) { // Ensure file is empty before writing const outputFileSize = await fs .stat(this.optionalConfig.outputFilePath) .then((stats) => stats.size) .catch(() => 0); if (outputFileSize > 0) { if (!this.optionalConfig.overwriteOutputFile) { console.error(`Output file ${this.optionalConfig.outputFilePath} already exists and overwriteOutputFile is false`); process.exit(1); } await fs.truncate(this.optionalConfig.outputFilePath, 0); } this.outputFile = createWriteStream(this.optionalConfig.outputFilePath, { flags: "a", encoding: "utf8", }); } // Set up failure output file if (this.optionalConfig.failureOutputFilePath) { // Ensure file is empty before writing const failureFileSize = await fs .stat(this.optionalConfig.failureOutputFilePath) .then((stats) => stats.size) .catch(() => 0); if (failureFileSize > 0) { if (!this.optionalConfig.overwriteOutputFile) { console.error(`Failure output file ${this.optionalConfig.failureOutputFilePath} already exists and overwriteOutputFile is false`); process.exit(1); } await fs.truncate(this.optionalConfig.failureOutputFilePath, 0); } this.failureOutputFile = createWriteStream(this.optionalConfig.failureOutputFilePath, { flags: "a", encoding: "utf8", }); } // Set up signal handlers process.on("SIGINT", this.handleShutdown.bind(this)); process.on("SIGTERM", this.handleShutdown.bind(this)); } catch (error) { const errorString = error instanceof Error ? error.message : String(error); console.error(`Error initializing worker manager: ${errorString}`); process.exit(1); } } handleShutdown() { if (this.isShuttingDown) return; this.isShuttingDown = true; console.log("Shutting down workers..."); this.cleanup(); process.exit(0); } cleanup() { if (this.workerIntervalId) { clearInterval(this.workerIntervalId); } this.workers.forEach(({ process: proc }) => { if (!proc.killed) { proc.kill("SIGTERM"); } }); if (this.logFile?.writable) { this.logFile.end(); } } handleIpcMessage(message, workerIndex) { const workerInfo = this.workers[workerIndex]; if (!workerInfo) return; if (message.type === "error") { const error = message.error; if (!error) return; this.logError(workerIndex, error); this.config.onError?.(error, { index: workerIndex, pid: workerInfo.process.pid, }); if (this.failureOutputFile?.writable) { this.failureOutputFile.write(`${error.name}: ${error.message}\n`); } } else if (message.type === "status") { workerInfo.status = message.status; const statuses = this.workers.map((w) => w.status); this.config.onStatusUpdate(statuses); } else if (message.type === "completed") { if (this.outputFile?.writable) { this.outputFile.write(message.result + "\n"); } } else if (message.type === "close-response") { if (this.logFile?.writable) { this.logFile.write(`[${new Date().toISOString()}] Worker ${workerIndex} confirmed completion\n`); } } } logError(workerIndex, error) { if (this.logFile?.writable) { const logMessage = `[${new Date().toISOString()}][Worker ${workerIndex}] Error: ${error.name} - ${error.message}`; this.logFile.write(logMessage + "\n"); } } startChildProcesses() { let exitCount = 0; console.log(`Starting ${this.config.processCount} worker processes...`); const handleExit = () => { const duration = Date.now() - this.startedAt; if (this.logFile?.writable) { this.logFile.write(`[${new Date().toISOString()}] Job completed. Duration: ${duration}ms\n`); } this.cleanup(); const statuses = this.workers.map((w) => w.status); this.config.onComplete(statuses); }; for (let i = 0; i < this.config.processCount; i++) { const childProcess = fork(this.config.workerFilePath, { env: { ...process.env, ...this.config.env, MAX_CONCURRENCY: String(this.config.maxConcurrency), }, stdio: ["inherit", "pipe", "pipe", "ipc"], }); const workerInfo = { process: childProcess, status: { custom: {}, started: 0, completed: 0, failed: 0, pending: 0, received: 0, inProgress: 0, }, closeRequested: false, }; childProcess.stdout.on("data", (data) => { if (this.logFile?.writable) { this.logFile.write(`[Worker ${i}] ${data.toString()}`); } }); childProcess.on("error", (err) => { console.error(`Worker ${i} error:`, err.message); this.config.onError({ name: err.name, message: err.message, stack: err.stack, }, { index: i, pid: childProcess.pid }); }); childProcess.on("exit", (code, signal) => { if (code !== 0 && signal !== "SIGTERM") { console.warn(`Worker ${i} exited with code ${code}, signal ${signal}`); } exitCount++; if (exitCount === this.config.processCount) { handleExit(); } }); childProcess.on("message", (message) => { this.handleIpcMessage(message, i); }); this.workers.push(workerInfo); } } async start() { await this.initialize(); if (this.logFile?.writable) { this.logFile.write(`\n[${new Date().toISOString()}] Workers started\n`); } this.startedAt = Date.now(); this.startChildProcesses(); const initialIndex = this.config.initialIndex; const stopAtIndex = initialIndex + this.config.totalEntries - 1; if (this.logFile?.writable) { this.logFile.write(`[${new Date().toISOString()}] Processing entries ${initialIndex} to ${stopAtIndex + 1}. Total: ${this.config.totalEntries}\n`); } // Start the distribution loop this.workerIntervalId = setInterval(() => { this.distributeWork(stopAtIndex); }, this.config.tickDuration); } distributeWork(stopAtIndex) { if (this.isShuttingDown) return; // Check if all work is distributed if (this.currentIndex > stopAtIndex) { this.checkForCompletion(); return; } // Distribute work to available workers this.workers.forEach((workerInfo) => { if (workerInfo.process.exitCode !== null) { return; } const workerPending = workerInfo.status.pending; // Skip if worker is busy if (workerPending >= this.config.maxPendingJobs) { return; } // Calculate batch size for this worker const remainingEntries = stopAtIndex + 1 - this.currentIndex; const batchSize = Math.min(this.config.batchSize, remainingEntries); if (batchSize <= 0) return; const entriesBatch = this.dataEntries.slice(this.currentIndex, this.currentIndex + batchSize); this.currentIndex += batchSize; this.sendToWorker(workerInfo.process, { type: "entries", entries: entriesBatch, }); }); } checkForCompletion() { // Send close requests to workers that appear to be idle this.workers.forEach((workerInfo, index) => { if (workerInfo.process.exitCode !== null) { return; // Worker already exited } // Only send close request if we haven't already and worker appears idle if (!workerInfo.closeRequested && workerInfo.status.pending <= 0) { this.sendToWorker(workerInfo.process, { type: "close-request" }); workerInfo.closeRequested = true; if (this.logFile?.writable) { this.logFile.write(`[${new Date().toISOString()}] Sent close request to Worker ${index}\n`); } } }); } sendToWorker(worker, message) { if (worker.connected && !worker.killed) { worker.send(message); } } } /** * Spawns worker processes to process data entries */ export async function spawnWorkers(config) { const manager = new WorkerManager(config); await manager.start(); } //# sourceMappingURL=spawnWorkers.js.map