spawn-workers
Version:
A high-performance worker pool library for Node.js that spawns worker processes to handle tasks in parallel
323 lines • 13 kB
JavaScript
import fs from "fs/promises";
import { ChildProcess, fork } from "node:child_process";
import { createWriteStream, WriteStream } from "node:fs";
import process from "node:process";
export class WorkerManager {
workers = [];
config;
optionalConfig;
logFile;
outputFile;
failureOutputFile;
startedAt = 0;
currentIndex = 0;
dataEntries = [];
workerIntervalId;
isShuttingDown = false;
constructor(config) {
// Validate required config
this.validateConfig(config);
// Resolve file paths (convert relative to absolute)
this.config = {
tickDuration: 500,
initialIndex: 0,
totalEntries: 0,
batchSize: 0,
maxPendingJobs: 0,
env: {},
...config,
onComplete: config.onComplete ?? (() => { }),
onStatusUpdate: config.onStatusUpdate ?? (() => { }),
onError: config.onError ?? (() => { }),
};
this.optionalConfig = {
logFilePath: config.logFilePath,
outputFilePath: config.outputFilePath,
failureOutputFilePath: config.failureOutputFilePath,
overwriteOutputFile: config.overwriteOutputFile || false,
};
}
validateConfig(config) {
if (config.processCount <= 0) {
throw new Error("processCount must be greater than 0");
}
if (config.maxConcurrency <= 0) {
throw new Error("maxConcurrency must be greater than 0");
}
}
async initialize() {
try {
const dataContent = await fs.readFile(this.config.dataFilePath, "utf8");
this.dataEntries = dataContent.trim().split("\n");
// Set up total entries if not specified
if (!this.config.totalEntries) {
this.config.totalEntries = Math.max(0, this.dataEntries.length - this.config.initialIndex);
}
// Set up batch size if not specified (defaults to data length / process count)
if (!this.config.batchSize) {
this.config.batchSize = Math.ceil(this.dataEntries.length / this.config.processCount);
}
// Set up max pending jobs if not specified
if (!this.config.maxPendingJobs) {
this.config.maxPendingJobs = this.config.batchSize * 2;
}
// Validate indices
if (this.config.initialIndex >= this.dataEntries.length) {
throw new Error(`initialIndex (${this.config.initialIndex}) is greater than or equal to data length (${this.dataEntries.length})`);
}
this.currentIndex = this.config.initialIndex;
// Set up logging
if (this.optionalConfig.logFilePath) {
this.logFile = createWriteStream(this.optionalConfig.logFilePath, {
flags: "a",
encoding: "utf8",
});
}
// Set up output file
if (this.optionalConfig.outputFilePath) {
// Ensure file is empty before writing
const outputFileSize = await fs
.stat(this.optionalConfig.outputFilePath)
.then((stats) => stats.size)
.catch(() => 0);
if (outputFileSize > 0) {
if (!this.optionalConfig.overwriteOutputFile) {
console.error(`Output file ${this.optionalConfig.outputFilePath} already exists and overwriteOutputFile is false`);
process.exit(1);
}
await fs.truncate(this.optionalConfig.outputFilePath, 0);
}
this.outputFile = createWriteStream(this.optionalConfig.outputFilePath, {
flags: "a",
encoding: "utf8",
});
}
// Set up failure output file
if (this.optionalConfig.failureOutputFilePath) {
// Ensure file is empty before writing
const failureFileSize = await fs
.stat(this.optionalConfig.failureOutputFilePath)
.then((stats) => stats.size)
.catch(() => 0);
if (failureFileSize > 0) {
if (!this.optionalConfig.overwriteOutputFile) {
console.error(`Failure output file ${this.optionalConfig.failureOutputFilePath} already exists and overwriteOutputFile is false`);
process.exit(1);
}
await fs.truncate(this.optionalConfig.failureOutputFilePath, 0);
}
this.failureOutputFile = createWriteStream(this.optionalConfig.failureOutputFilePath, {
flags: "a",
encoding: "utf8",
});
}
// Set up signal handlers
process.on("SIGINT", this.handleShutdown.bind(this));
process.on("SIGTERM", this.handleShutdown.bind(this));
}
catch (error) {
const errorString = error instanceof Error ? error.message : String(error);
console.error(`Error initializing worker manager: ${errorString}`);
process.exit(1);
}
}
handleShutdown() {
if (this.isShuttingDown)
return;
this.isShuttingDown = true;
console.log("Shutting down workers...");
this.cleanup();
process.exit(0);
}
cleanup() {
if (this.workerIntervalId) {
clearInterval(this.workerIntervalId);
}
this.workers.forEach(({ process: proc }) => {
if (!proc.killed) {
proc.kill("SIGTERM");
}
});
if (this.logFile?.writable) {
this.logFile.end();
}
}
handleIpcMessage(message, workerIndex) {
const workerInfo = this.workers[workerIndex];
if (!workerInfo)
return;
if (message.type === "error") {
const error = message.error;
if (!error)
return;
this.logError(workerIndex, error);
this.config.onError?.(error, {
index: workerIndex,
pid: workerInfo.process.pid,
});
if (this.failureOutputFile?.writable) {
this.failureOutputFile.write(`${error.name}: ${error.message}\n`);
}
}
else if (message.type === "status") {
workerInfo.status = message.status;
const statuses = this.workers.map((w) => w.status);
this.config.onStatusUpdate(statuses);
}
else if (message.type === "completed") {
if (this.outputFile?.writable) {
this.outputFile.write(message.result + "\n");
}
}
else if (message.type === "close-response") {
if (this.logFile?.writable) {
this.logFile.write(`[${new Date().toISOString()}] Worker ${workerIndex} confirmed completion\n`);
}
}
}
logError(workerIndex, error) {
if (this.logFile?.writable) {
const logMessage = `[${new Date().toISOString()}][Worker ${workerIndex}] Error: ${error.name} - ${error.message}`;
this.logFile.write(logMessage + "\n");
}
}
startChildProcesses() {
let exitCount = 0;
console.log(`Starting ${this.config.processCount} worker processes...`);
const handleExit = () => {
const duration = Date.now() - this.startedAt;
if (this.logFile?.writable) {
this.logFile.write(`[${new Date().toISOString()}] Job completed. Duration: ${duration}ms\n`);
}
this.cleanup();
const statuses = this.workers.map((w) => w.status);
this.config.onComplete(statuses);
};
for (let i = 0; i < this.config.processCount; i++) {
const childProcess = fork(this.config.workerFilePath, {
env: {
...process.env,
...this.config.env,
MAX_CONCURRENCY: String(this.config.maxConcurrency),
},
stdio: ["inherit", "pipe", "pipe", "ipc"],
});
const workerInfo = {
process: childProcess,
status: {
custom: {},
started: 0,
completed: 0,
failed: 0,
pending: 0,
received: 0,
inProgress: 0,
},
closeRequested: false,
};
childProcess.stdout.on("data", (data) => {
if (this.logFile?.writable) {
this.logFile.write(`[Worker ${i}] ${data.toString()}`);
}
});
childProcess.on("error", (err) => {
console.error(`Worker ${i} error:`, err.message);
this.config.onError({
name: err.name,
message: err.message,
stack: err.stack,
}, { index: i, pid: childProcess.pid });
});
childProcess.on("exit", (code, signal) => {
if (code !== 0 && signal !== "SIGTERM") {
console.warn(`Worker ${i} exited with code ${code}, signal ${signal}`);
}
exitCount++;
if (exitCount === this.config.processCount) {
handleExit();
}
});
childProcess.on("message", (message) => {
this.handleIpcMessage(message, i);
});
this.workers.push(workerInfo);
}
}
async start() {
await this.initialize();
if (this.logFile?.writable) {
this.logFile.write(`\n[${new Date().toISOString()}] Workers started\n`);
}
this.startedAt = Date.now();
this.startChildProcesses();
const initialIndex = this.config.initialIndex;
const stopAtIndex = initialIndex + this.config.totalEntries - 1;
if (this.logFile?.writable) {
this.logFile.write(`[${new Date().toISOString()}] Processing entries ${initialIndex} to ${stopAtIndex + 1}. Total: ${this.config.totalEntries}\n`);
}
// Start the distribution loop
this.workerIntervalId = setInterval(() => {
this.distributeWork(stopAtIndex);
}, this.config.tickDuration);
}
distributeWork(stopAtIndex) {
if (this.isShuttingDown)
return;
// Check if all work is distributed
if (this.currentIndex > stopAtIndex) {
this.checkForCompletion();
return;
}
// Distribute work to available workers
this.workers.forEach((workerInfo) => {
if (workerInfo.process.exitCode !== null) {
return;
}
const workerPending = workerInfo.status.pending;
// Skip if worker is busy
if (workerPending >= this.config.maxPendingJobs) {
return;
}
// Calculate batch size for this worker
const remainingEntries = stopAtIndex + 1 - this.currentIndex;
const batchSize = Math.min(this.config.batchSize, remainingEntries);
if (batchSize <= 0)
return;
const entriesBatch = this.dataEntries.slice(this.currentIndex, this.currentIndex + batchSize);
this.currentIndex += batchSize;
this.sendToWorker(workerInfo.process, {
type: "entries",
entries: entriesBatch,
});
});
}
checkForCompletion() {
// Send close requests to workers that appear to be idle
this.workers.forEach((workerInfo, index) => {
if (workerInfo.process.exitCode !== null) {
return; // Worker already exited
}
// Only send close request if we haven't already and worker appears idle
if (!workerInfo.closeRequested && workerInfo.status.pending <= 0) {
this.sendToWorker(workerInfo.process, { type: "close-request" });
workerInfo.closeRequested = true;
if (this.logFile?.writable) {
this.logFile.write(`[${new Date().toISOString()}] Sent close request to Worker ${index}\n`);
}
}
});
}
sendToWorker(worker, message) {
if (worker.connected && !worker.killed) {
worker.send(message);
}
}
}
/**
* Spawns worker processes to process data entries
*/
export async function spawnWorkers(config) {
const manager = new WorkerManager(config);
await manager.start();
}
//# sourceMappingURL=spawnWorkers.js.map