fortify2-js
Version:
MOST POWERFUL JavaScript Security Library! Military-grade cryptography + 19 enhanced object methods + quantum-resistant algorithms + perfect TypeScript support. More powerful than Lodash with built-in security.
1,188 lines (1,182 loc) • 46.4 kB
JavaScript
'use strict';
var cluster = require('cluster');
var os = require('os');
var events = require('events');
var pidusage = require('pidusage');
var errorHandler = require('../../../../utils/errorHandler.js');
var index = require('../../../../components/fortified-function/index.js');
var Logger = require('../../server/utils/Logger.js');
function _interopNamespaceDefault(e) {
var n = Object.create(null);
if (e) {
Object.keys(e).forEach(function (k) {
if (k !== 'default') {
var d = Object.getOwnPropertyDescriptor(e, k);
Object.defineProperty(n, k, d.get ? d : {
enumerable: true,
get: function () { return e[k]; }
});
}
});
}
n.default = e;
return Object.freeze(n);
}
var cluster__namespace = /*#__PURE__*/_interopNamespaceDefault(cluster);
var os__namespace = /*#__PURE__*/_interopNamespaceDefault(os);
/**
* FortifyJS Worker Manager
* Production-ready worker process lifecycle management with advanced monitoring
*/
/**
* Production-grade worker process manager with intelligent lifecycle management
*/
class WorkerManager extends events.EventEmitter {
constructor(config, errorLogger) {
super();
this.workers = new Map();
this.workerMetrics = new Map();
this.restartCounts = new Map();
this.lastRestartTime = new Map();
this.isShuttingDown = false;
this.shutdownPromise = null;
if (!cluster__namespace.isPrimary && !cluster__namespace.isMaster) {
throw new Error("WorkerManager can only be instantiated in the primary process");
}
this.config = config;
this.errorLogger = errorLogger;
// Initialize optimized worker pool
const optimalWorkerCount = this.getOptimalWorkerCount();
this.workerPool = {
active: new Map(),
pending: new Set(),
draining: new Set(),
dead: new Map(),
maxSize: Math.min(optimalWorkerCount, this.config.autoScaling?.maxWorkers || 32),
currentSize: 0,
targetSize: optimalWorkerCount,
};
this.setupClusterEventHandlers();
this.monitoringInterval = this.setupWorkerMonitoring();
// Handle process cleanup
process.once("SIGTERM", () => this.gracefulShutdown());
process.once("SIGINT", () => this.gracefulShutdown());
}
/**
* Calculate optimal worker count with system resource consideration
*/
getOptimalWorkerCount() {
const configWorkers = this.config.workers;
const cpuCount = os__namespace.cpus().length;
const totalMemory = os__namespace.totalmem();
const freeMemory = os__namespace.freemem();
if (typeof configWorkers === "number") {
return Math.max(1, Math.min(configWorkers, cpuCount * 2));
}
if (configWorkers === "auto") {
// Advanced auto-scaling based on system resources
const memoryUtilization = 1 - freeMemory / totalMemory;
let workerCount = cpuCount;
// Scale down if memory usage is high
if (memoryUtilization > 0.8) {
workerCount = Math.ceil(cpuCount * 0.5);
}
else if (memoryUtilization > 0.6) {
workerCount = Math.ceil(cpuCount * 0.75);
}
// Always reserve one CPU for the primary process
return Math.max(1, Math.min(workerCount - 1, 16));
}
return Math.max(1, cpuCount - 1);
}
/**
* Setup cluster event handlers with proper error handling
*/
setupClusterEventHandlers() {
const safeHandler = (handler) => index.func(handler, {
ultraFast: "maximum",
auditLog: true,
errorHandling: "graceful",
});
cluster__namespace.on("fork", safeHandler((worker) => {
this.handleWorkerEvent("fork", worker);
}));
cluster__namespace.on("online", safeHandler((worker) => {
this.handleWorkerEvent("online", worker);
}));
cluster__namespace.on("listening", safeHandler((worker, address) => {
this.handleWorkerEvent("listening", worker);
}));
cluster__namespace.on("disconnect", safeHandler((worker) => {
this.handleWorkerEvent("disconnect", worker);
}));
cluster__namespace.on("exit", safeHandler((worker, code, signal) => {
this.handleWorkerEvent("exit", worker, code, signal);
}));
// Handle worker messages for IPC communication
cluster__namespace.on("message", safeHandler((worker, message) => {
this.handleWorkerMessage(worker, message);
}));
}
/**
* Setup optimized real-time worker monitoring
*/
setupWorkerMonitoring() {
return setInterval(async () => {
if (this.isShuttingDown)
return;
try {
await this.updateWorkerMetrics();
this.performHealthChecks();
this.optimizeWorkerPool();
}
catch (error) {
const securityError = errorHandler.createSecurityError(`Worker monitoring failed: ${error.message}`, errorHandler.ErrorType.INTERNAL, errorHandler.ErrorSeverity.MEDIUM, "WORKER_MONITORING_ERROR", { operation: "worker_monitoring" });
this.errorLogger.logError(securityError);
}
}, this.config.monitoring?.metricsInterval || 5000);
}
/**
* Wait for worker to come online with timeout
*/
async waitForWorkerOnline(worker, workerId) {
return new Promise((resolve, reject) => {
const timeout = setTimeout(() => {
reject(new Error(`Worker ${workerId} startup timeout`));
}, 15000); // 15 second startup timeout
const cleanup = () => {
clearTimeout(timeout);
worker.removeListener("online", onOnline);
worker.removeListener("error", onError);
worker.removeListener("exit", onExit);
};
const onOnline = () => {
cleanup();
resolve();
};
const onError = (error) => {
cleanup();
reject(error);
};
const onExit = (code, signal) => {
cleanup();
reject(new Error(`Worker ${workerId} exited during startup: code=${code}, signal=${signal}`));
};
worker.once("online", onOnline);
worker.once("error", onError);
worker.once("exit", onExit);
});
}
/**
* Graceful shutdown of all workers
*/
async gracefulShutdown() {
if (this.shutdownPromise) {
return this.shutdownPromise;
}
this.shutdownPromise = this.performGracefulShutdown();
return this.shutdownPromise;
}
/**
* Perform the actual graceful shutdown
*/
async performGracefulShutdown() {
if (this.isShuttingDown)
return;
this.isShuttingDown = true;
Logger.logger.info("cluster", "Starting graceful shutdown...");
// Stop monitoring
if (this.monitoringInterval) {
clearInterval(this.monitoringInterval);
}
// Stop accepting new requests
this.emit("shutdown:started");
const timeout = this.config.processManagement?.gracefulShutdownTimeout || 30000;
const workers = Array.from(this.workers.values());
if (workers.length === 0) {
Logger.logger.info("cluster", "No workers to shutdown");
return;
}
Logger.logger.info("cluster", `Shutting down ${workers.length} workers with ${timeout}ms timeout`);
// Phase 1: Signal workers to stop accepting new requests
await this.signalWorkersToStopAccepting(workers);
// Phase 2: Wait for workers to finish current requests
await this.waitForWorkersToFinish(workers, timeout * 0.8);
// Phase 3: Force shutdown remaining workers
await this.forceShutdownRemainingWorkers(workers);
Logger.logger.info("cluster", "Graceful shutdown completed");
}
/**
* Signal workers to stop accepting new requests
*/
async signalWorkersToStopAccepting(workers) {
const promises = workers.map((worker) => {
return new Promise((resolve) => {
try {
worker.send({ type: "shutdown", phase: "drain" });
// Give workers a moment to process the signal
setTimeout(resolve, 100);
}
catch (error) {
// Worker might be dead, continue
resolve();
}
});
});
await Promise.all(promises);
Logger.logger.info("cluster", "Signaled workers to drain connections");
}
/**
* Wait for workers to finish processing current requests
*/
async waitForWorkersToFinish(workers, timeout) {
const startTime = Date.now();
const activeWorkers = new Set(workers);
return new Promise((resolve) => {
const checkInterval = setInterval(() => {
const elapsed = Date.now() - startTime;
// Remove workers that have already exited
Array.from(activeWorkers).forEach((worker) => {
if (worker.isDead()) {
activeWorkers.delete(worker);
}
});
// Check if all workers are done or timeout reached
if (activeWorkers.size === 0 || elapsed >= timeout) {
clearInterval(checkInterval);
Logger.logger.info("cluster", `Graceful phase completed: ${workers.length - activeWorkers.size}/${workers.length} workers finished`);
resolve();
}
}, 500);
// Start graceful disconnect
Array.from(activeWorkers).forEach((worker) => {
try {
worker.disconnect();
}
catch (error) {
// Worker might be dead, continue
}
});
});
}
/**
* Force shutdown any remaining workers
*/
async forceShutdownRemainingWorkers(workers) {
const remainingWorkers = workers.filter((worker) => !worker.isDead());
if (remainingWorkers.length === 0)
return;
Logger.logger.warn("cluster", `Force shutting down ${remainingWorkers.length} remaining workers`);
const promises = remainingWorkers.map((worker) => {
return new Promise((resolve) => {
const forceTimeout = setTimeout(() => {
try {
worker.kill("SIGKILL");
}
catch (error) {
// Ignore errors
}
resolve();
}, 5000);
worker.once("exit", () => {
clearTimeout(forceTimeout);
resolve();
});
try {
worker.kill("SIGTERM");
}
catch (error) {
clearTimeout(forceTimeout);
resolve();
}
});
});
await Promise.all(promises);
}
/**
* Stop all workers (legacy method for backward compatibility)
*/
async stopAllWorkers(graceful = true) {
if (graceful) {
return this.gracefulShutdown();
}
else {
return this.forceShutdownAllWorkers();
}
}
/**
* Force shutdown all workers immediately
*/
async forceShutdownAllWorkers() {
this.isShuttingDown = true;
const workers = Array.from(this.workers.values());
const promises = workers.map((worker) => this.forceStopWorker(worker));
await Promise.allSettled(promises);
}
/**
* Force stop individual worker
*/
async forceStopWorker(worker) {
return new Promise((resolve) => {
const timer = setTimeout(() => {
try {
worker.kill("SIGKILL");
}
catch (error) {
// Ignore errors
}
resolve();
}, 2000);
worker.once("exit", () => {
clearTimeout(timer);
resolve();
});
try {
worker.kill("SIGTERM");
}
catch (error) {
clearTimeout(timer);
resolve();
}
});
}
/**
* Get all active workers with enhanced filtering
*/
getActiveWorkers() {
return Array.from(this.workerPool.active.values()).filter((metrics) => metrics.health.status !== "dead");
}
/**
* Get healthy workers only
*/
getHealthyWorkers() {
return this.getActiveWorkers().filter((metrics) => metrics.health.status === "healthy" &&
metrics.health.healthScore >= 80);
}
/**
* Get worker by ID with validation
*/
getWorker(workerId) {
return this.workerMetrics.get(workerId) || null;
}
/**
* Get comprehensive worker pool status
*/
getWorkerPool() {
const activeWorkers = this.getActiveWorkers();
const healthy = activeWorkers.filter((w) => w.health.status === "healthy").length;
const unhealthy = activeWorkers.length - healthy;
const avgHealthScore = activeWorkers.length > 0
? activeWorkers.reduce((sum, w) => sum + w.health.healthScore, 0) / activeWorkers.length
: 0;
const totalRestarts = Array.from(this.restartCounts.values()).reduce((sum, count) => sum + count, 0);
return {
...this.workerPool,
healthy,
unhealthy,
avgHealthScore: Math.round(avgHealthScore),
totalRestarts,
};
}
/**
* Start a single worker with enhanced error handling
*/
async startSingleWorker() {
if (!cluster__namespace.isPrimary && !cluster__namespace.isMaster) {
throw new Error("Cannot start worker from worker process");
}
if (this.isShuttingDown) {
throw new Error("Cannot start worker during shutdown");
}
if (this.workerPool.currentSize >= this.workerPool.maxSize) {
throw new Error(`Maximum worker limit reached (${this.workerPool.maxSize})`);
}
try {
const worker = cluster__namespace.fork();
const workerId = this.getWorkerId(worker);
// Wait for worker to come online
await this.waitForWorkerOnline(worker, workerId);
Logger.logger.info("cluster", `Started single worker: ${workerId}`);
return workerId;
}
catch (error) {
const securityError = errorHandler.createSecurityError(`Failed to start single worker: ${error.message}`, errorHandler.ErrorType.INTERNAL, errorHandler.ErrorSeverity.HIGH, "SINGLE_WORKER_START_ERROR", { operation: "start_single_worker" });
this.errorLogger.logError(securityError);
throw error;
}
}
/**
* Stop a specific worker with enhanced control
*/
async stopSingleWorker(workerId, graceful = true) {
const worker = this.workers.get(workerId);
if (!worker) {
throw new Error(`Worker ${workerId} not found`);
}
if (worker.isDead()) {
Logger.logger.warn("cluster", `Worker ${workerId} is already dead`);
return;
}
const timeout = this.config.processManagement?.gracefulShutdownTimeout || 30000;
try {
if (graceful) {
await this.gracefulStopWorker(worker, workerId, timeout);
}
else {
await this.forceStopWorker(worker);
}
Logger.logger.info("cluster", `Stopped worker: ${workerId}`);
}
catch (error) {
const securityError = errorHandler.createSecurityError(`Failed to stop worker ${workerId}: ${error.message}`, errorHandler.ErrorType.INTERNAL, errorHandler.ErrorSeverity.MEDIUM, "SINGLE_WORKER_STOP_ERROR", { operation: "stop_single_worker" });
this.errorLogger.logError(securityError);
throw error;
}
}
/**
* Gracefully stop a single worker
*/
async gracefulStopWorker(worker, workerId, timeout) {
return new Promise((resolve, reject) => {
const timer = setTimeout(() => {
try {
worker.kill("SIGKILL");
}
catch (error) {
// Ignore errors
}
reject(new Error(`Worker ${workerId} graceful stop timeout`));
}, timeout);
worker.once("disconnect", () => {
clearTimeout(timer);
resolve();
});
worker.once("exit", () => {
clearTimeout(timer);
resolve();
});
try {
// Signal worker to drain connections
worker.send({ type: "shutdown", phase: "drain" });
// Disconnect after a brief delay
setTimeout(() => {
try {
worker.disconnect();
}
catch (error) {
// Worker might have already disconnected
}
}, 1000);
}
catch (error) {
clearTimeout(timer);
reject(error);
}
});
}
/**
* Scale worker pool to target size
*/
async scaleWorkers(targetSize) {
if (!cluster__namespace.isPrimary && !cluster__namespace.isMaster) {
throw new Error("Cannot scale workers from worker process");
}
if (this.isShuttingDown) {
throw new Error("Cannot scale workers during shutdown");
}
const clampedTarget = Math.max(1, Math.min(targetSize, this.workerPool.maxSize));
const currentSize = this.workerPool.currentSize;
if (clampedTarget === currentSize) {
Logger.logger.info("cluster", `Worker pool already at target size: ${currentSize}`);
return;
}
Logger.logger.info("cluster", `Scaling worker pool from ${currentSize} to ${clampedTarget} workers`);
if (clampedTarget > currentSize) {
// Scale up
const workersToAdd = clampedTarget - currentSize;
await this.startWorkers(workersToAdd);
}
else {
// Scale down
const workersToRemove = currentSize - clampedTarget;
await this.scaleDownWorkers(workersToRemove);
}
this.workerPool.targetSize = clampedTarget;
Logger.logger.info("cluster", `Worker pool scaled to ${this.workerPool.currentSize} workers`);
}
/**
* Scale down workers by removing the least healthy ones
*/
async scaleDownWorkers(count) {
const activeWorkers = this.getActiveWorkers()
.sort((a, b) => a.health.healthScore - b.health.healthScore) // Least healthy first
.slice(0, count);
const promises = activeWorkers.map((worker) => this.stopSingleWorker(worker.workerId, true));
await Promise.allSettled(promises);
}
/**
* Get comprehensive worker statistics
*/
getWorkerStats() {
const workers = Array.from(this.workerMetrics.values());
const stats = {
total: workers.length,
active: workers.filter((w) => w.health.status !== "dead").length,
healthy: workers.filter((w) => w.health.status === "healthy")
.length,
warning: workers.filter((w) => w.health.status === "warning")
.length,
critical: workers.filter((w) => w.health.status === "critical")
.length,
dead: workers.filter((w) => w.health.status === "dead").length,
totalRequests: workers.reduce((sum, w) => sum + w.requests.total, 0),
totalErrors: workers.reduce((sum, w) => sum + w.requests.errors, 0),
avgResponseTime: 0,
avgCpuUsage: 0,
avgMemoryUsage: 0,
totalRestarts: Array.from(this.restartCounts.values()).reduce((sum, count) => sum + count, 0),
};
const activeWorkers = workers.filter((w) => w.health.status !== "dead");
if (activeWorkers.length > 0) {
stats.avgResponseTime = Math.round(activeWorkers.reduce((sum, w) => sum + w.requests.averageResponseTime, 0) / activeWorkers.length);
stats.avgCpuUsage = Math.round(activeWorkers.reduce((sum, w) => sum + w.cpu.usage, 0) /
activeWorkers.length);
stats.avgMemoryUsage = Math.round(activeWorkers.reduce((sum, w) => sum + w.memory.percentage, 0) /
activeWorkers.length);
}
return stats;
}
/**
* Send message to all workers
*/
broadcastToWorkers(message) {
this.workers.forEach((worker, workerId) => {
try {
if (!worker.isDead()) {
worker.send(message);
}
}
catch (error) {
Logger.logger.warn("cluster", `Failed to send message to worker ${workerId}: ${error}`);
}
});
}
/**
* Send message to a specific worker
*/
sendToWorker(workerId, message) {
const worker = this.workers.get(workerId);
if (!worker || worker.isDead()) {
return false;
}
try {
worker.send(message);
return true;
}
catch (error) {
Logger.logger.warn("cluster", `Failed to send message to worker ${workerId}: ${error}`);
return false;
}
}
/**
* Clean up resources and stop monitoring
*/
async destroy() {
await this.gracefulShutdown();
if (this.monitoringInterval) {
clearInterval(this.monitoringInterval);
}
this.removeAllListeners();
this.workers.clear();
this.workerMetrics.clear();
this.restartCounts.clear();
this.lastRestartTime.clear();
Logger.logger.info("cluster", "WorkerManager destroyed");
}
/**
* Handle worker IPC messages with enhanced processing
*/
handleWorkerMessage(worker, message) {
if (!message || typeof message !== "object")
return;
const workerId = this.getWorkerId(worker);
try {
switch (message.type) {
case "metrics_update":
this.updateWorkerMetricsFromMessage(workerId, message.data);
break;
case "health_check":
this.updateWorkerHealth(workerId, message.data);
break;
case "request_stats":
this.updateRequestStats(workerId, message.data);
break;
case "memory_warning":
this.handleMemoryWarning(workerId, message.data);
break;
default:
// Emit for custom message handling
this.emit("worker:message", workerId, message);
}
}
catch (error) {
const securityError = errorHandler.createSecurityError(`Worker message processing failed: ${error.message}`, errorHandler.ErrorType.INTERNAL, errorHandler.ErrorSeverity.MEDIUM, "WORKER_MESSAGE_ERROR", { operation: "handle_worker_message" });
this.errorLogger.logError(securityError);
}
}
/**
* Update worker metrics using efficient batch processing
*/
async updateWorkerMetrics() {
const activeWorkers = Array.from(this.workers.entries());
if (activeWorkers.length === 0)
return;
// Batch collect PIDs
const pidMap = new Map();
activeWorkers.forEach(([workerId, worker]) => {
if (worker.process?.pid) {
pidMap.set(worker.process.pid, workerId);
}
});
if (pidMap.size === 0)
return;
try {
// Batch pidusage call for efficiency
const pids = Array.from(pidMap.keys());
const statsMap = await pidusage(pids);
// Process stats in batch
Object.entries(statsMap).forEach(([pidStr, stats]) => {
const pid = parseInt(pidStr, 10);
const workerId = pidMap.get(pid);
if (workerId && stats) {
this.updateWorkerMetricsFromStats(workerId, stats);
}
});
}
catch (error) {
// Handle partial failures gracefully
Logger.logger.warn("cluster", `Batch metrics collection failed: ${error.message}`);
// Fallback to individual collection for critical workers
await this.fallbackMetricsCollection(activeWorkers);
}
}
/**
* Fallback metrics collection for individual workers
*/
async fallbackMetricsCollection(workers) {
const promises = workers.map(async ([workerId, worker]) => {
try {
if (!worker.process?.pid)
return;
const stats = await pidusage(worker.process.pid);
this.updateWorkerMetricsFromStats(workerId, stats);
}
catch (error) {
this.handleWorkerMetricsError(workerId);
}
});
await Promise.allSettled(promises);
}
/**
* Update worker metrics from pidusage stats
*/
updateWorkerMetricsFromStats(workerId, stats) {
const metrics = this.workerMetrics.get(workerId);
if (!metrics)
return;
const now = Date.now();
// Update CPU metrics with smoothing
const prevCpuUsage = metrics.cpu.usage;
metrics.cpu.usage = this.smoothValue(prevCpuUsage, stats.cpu, 0.3);
metrics.cpu.average = metrics.cpu.average * 0.9 + stats.cpu * 0.1;
metrics.cpu.peak = Math.max(metrics.cpu.peak, stats.cpu);
// Update memory metrics
metrics.memory.usage = stats.memory;
metrics.memory.peak = Math.max(metrics.memory.peak, stats.memory);
metrics.memory.percentage = (stats.memory / os__namespace.totalmem()) * 100;
// Update uptime efficiently
const worker = this.workers.get(workerId);
if (worker?.process) {
const startTime = worker.process.spawndate?.getTime() ||
this.lastRestartTime.get(workerId)?.getTime() ||
now;
metrics.uptime = now - startTime;
}
// Calculate health score
this.calculateHealthScore(metrics);
// Update last check
metrics.health.lastCheck = new Date();
}
/**
* Smooth value changes to avoid metric spikes
*/
smoothValue(oldValue, newValue, factor) {
return oldValue * (1 - factor) + newValue * factor;
}
/**
* Handle worker metrics collection errors
*/
handleWorkerMetricsError(workerId) {
const metrics = this.workerMetrics.get(workerId);
if (metrics) {
metrics.health.status = "critical";
metrics.health.consecutiveFailures++;
// Consider worker dead if too many consecutive failures
if (metrics.health.consecutiveFailures > 3) {
this.emit("worker:unresponsive", workerId);
}
}
}
/**
* Update worker metrics from IPC message
*/
updateWorkerMetricsFromMessage(workerId, data) {
const metrics = this.workerMetrics.get(workerId);
if (!metrics || !data)
return;
// Safely merge metrics data
if (data.requests && typeof data.requests === "object") {
Object.assign(metrics.requests, data.requests);
}
if (data.network && typeof data.network === "object") {
Object.assign(metrics.network, data.network);
}
if (data.gc && typeof data.gc === "object") {
Object.assign(metrics.gc, data.gc);
}
if (data.eventLoop && typeof data.eventLoop === "object") {
Object.assign(metrics.eventLoop, data.eventLoop);
}
if (data.memory && typeof data.memory === "object") {
// Merge additional memory stats from worker
Object.assign(metrics.memory, data.memory);
}
}
/**
* Update request statistics
*/
updateRequestStats(workerId, data) {
const metrics = this.workerMetrics.get(workerId);
if (!metrics || !data)
return;
// Update request metrics with validation
if (typeof data.total === "number")
metrics.requests.total = data.total;
if (typeof data.errors === "number")
metrics.requests.errors = data.errors;
if (typeof data.averageResponseTime === "number") {
metrics.requests.averageResponseTime = data.averageResponseTime;
}
if (typeof data.activeRequests === "number") {
metrics.requests.activeRequests = data.activeRequests;
}
}
/**
* Handle memory warning from worker
*/
handleMemoryWarning(workerId, data) {
const metrics = this.workerMetrics.get(workerId);
if (!metrics)
return;
Logger.logger.warn("cluster", `Memory warning from worker ${workerId}: ${data.message}`);
metrics.health.status = "warning";
// Consider restarting worker if memory usage is critical
if (data.usage > 0.9) {
this.emit("worker:memory_critical", workerId, data);
}
}
/**
* Update worker health from health check
*/
updateWorkerHealth(workerId, healthData) {
const metrics = this.workerMetrics.get(workerId);
if (!metrics || !healthData)
return;
metrics.health.status = healthData.status || "healthy";
metrics.health.lastCheck = new Date();
metrics.health.healthScore = Math.max(0, Math.min(100, healthData.score || 100));
if (healthData.status === "healthy") {
metrics.health.consecutiveFailures = 0;
}
else {
metrics.health.consecutiveFailures++;
}
}
/**
* Calculate comprehensive health score
*/
calculateHealthScore(metrics) {
let score = 100;
// CPU utilization penalty
if (metrics.cpu.usage > 90)
score -= 30;
else if (metrics.cpu.usage > 80)
score -= 20;
else if (metrics.cpu.usage > 60)
score -= 10;
// Memory utilization penalty
if (metrics.memory.percentage > 95)
score -= 40;
else if (metrics.memory.percentage > 90)
score -= 30;
else if (metrics.memory.percentage > 70)
score -= 15;
// Error rate penalty
if (metrics.requests.total > 0) {
const errorRate = (metrics.requests.errors / metrics.requests.total) * 100;
if (errorRate > 15)
score -= 35;
else if (errorRate > 10)
score -= 25;
else if (errorRate > 5)
score -= 10;
}
// Response time penalty
if (metrics.requests.averageResponseTime > 5000)
score -= 25;
else if (metrics.requests.averageResponseTime > 2000)
score -= 15;
else if (metrics.requests.averageResponseTime > 1000)
score -= 5;
// Event loop delay penalty
if (metrics.eventLoop.delay > 100)
score -= 20;
else if (metrics.eventLoop.delay > 50)
score -= 10;
// Consecutive failures penalty
score -= metrics.health.consecutiveFailures * 10;
metrics.health.healthScore = Math.max(0, score);
// Update health status based on score
if (score >= 80) {
metrics.health.status = "healthy";
}
else if (score >= 50) {
metrics.health.status = "warning";
}
else {
metrics.health.status = "critical";
}
}
/**
* Perform health checks and trigger actions
*/
performHealthChecks() {
this.workerMetrics.forEach((metrics, workerId) => {
// Check for unresponsive workers
const timeSinceLastCheck = Date.now() - metrics.health.lastCheck.getTime();
if (timeSinceLastCheck > 30000) {
// 30 seconds
metrics.health.status = "critical";
metrics.health.consecutiveFailures++;
}
// Auto-restart critically unhealthy workers
if (metrics.health.healthScore < 20 &&
metrics.health.consecutiveFailures > 5 &&
!this.isShuttingDown) {
this.emit("worker:auto_restart_needed", workerId);
this.restartUnhealthyWorker(workerId);
}
});
}
/**
* Optimize worker pool based on current load and health
*/
optimizeWorkerPool() {
if (this.isShuttingDown)
return;
const activeWorkers = this.getActiveWorkers();
const avgCpuUsage = activeWorkers.reduce((sum, w) => sum + w.cpu.usage, 0) /
activeWorkers.length;
const avgMemoryUsage = activeWorkers.reduce((sum, w) => sum + w.memory.percentage, 0) /
activeWorkers.length;
// Scale up if high utilization
if (avgCpuUsage > 80 &&
this.workerPool.currentSize < this.workerPool.maxSize) {
this.emit("worker:scale_up_needed");
}
// Scale down if low utilization (but keep minimum workers)
else if (avgCpuUsage < 30 &&
avgMemoryUsage < 50 &&
this.workerPool.currentSize >
Math.max(2, this.getOptimalWorkerCount() / 2)) {
this.emit("worker:scale_down_possible");
}
}
/**
* Restart unhealthy worker
*/
async restartUnhealthyWorker(workerId) {
try {
Logger.logger.warn("cluster", `Restarting unhealthy worker: ${workerId}`);
await this.stopSingleWorker(workerId, false);
await this.startSingleWorker();
}
catch (error) {
Logger.logger.error("cluster", `Failed to restart unhealthy worker ${workerId}: ${error.message}`);
}
}
/**
* Handle worker events with comprehensive monitoring
*/
async handleWorkerEvent(eventType, worker, code, signal) {
const workerId = this.getWorkerId(worker);
try {
switch (eventType) {
case "fork":
await this.handleWorkerFork(workerId, worker);
break;
case "online":
await this.handleWorkerOnline(workerId, worker);
break;
case "listening":
await this.handleWorkerListening(workerId, worker);
break;
case "disconnect":
await this.handleWorkerDisconnect(workerId, worker);
break;
case "exit":
await this.handleWorkerExit(workerId, worker, code, signal);
break;
}
}
catch (error) {
const securityError = errorHandler.createSecurityError(`Worker event handling failed: ${error.message}`, errorHandler.ErrorType.INTERNAL, errorHandler.ErrorSeverity.HIGH, "WORKER_EVENT_ERROR", { operation: "handle_worker_event" });
this.errorLogger.logError(securityError);
}
}
/**
* Handle worker fork event
*/
async handleWorkerFork(workerId, worker) {
this.workers.set(workerId, worker);
this.workerPool.pending.add(workerId);
// Initialize comprehensive worker metrics
const metrics = {
workerId,
pid: worker.process.pid || 0,
uptime: 0,
restarts: this.restartCounts.get(workerId) || 0,
lastRestart: this.lastRestartTime.get(workerId),
cpu: { usage: 0, average: 0, peak: 0 },
memory: {
usage: 0,
peak: 0,
percentage: 0,
heapUsed: 0,
heapTotal: 0,
external: 0,
},
network: {
connections: 0,
bytesReceived: 0,
bytesSent: 0,
connectionsPerSecond: 0,
},
requests: {
total: 0,
perSecond: 0,
errors: 0,
averageResponseTime: 0,
p95ResponseTime: 0,
p99ResponseTime: 0,
activeRequests: 0,
queuedRequests: 0,
},
health: {
status: "healthy",
lastCheck: new Date(),
consecutiveFailures: 0,
healthScore: 100,
},
gc: { collections: 0, timeSpent: 0, averageTime: 0 },
eventLoop: { delay: 0, utilization: 0 },
};
this.workerMetrics.set(workerId, metrics);
this.emit("worker:started", workerId, worker.process.pid || 0);
Logger.logger.info("cluster", `Worker ${workerId} forked with PID ${worker.process.pid}`);
}
/**
* Handle worker online event
*/
async handleWorkerOnline(workerId, worker) {
this.workerPool.pending.delete(workerId);
const metrics = this.workerMetrics.get(workerId);
if (metrics) {
metrics.health.status = "healthy";
this.workerPool.active.set(workerId, metrics);
this.workerPool.currentSize++;
}
Logger.logger.info("cluster", `Worker ${workerId} is online`);
}
/**
* Handle worker listening event
*/
async handleWorkerListening(workerId, worker) {
const metrics = this.workerMetrics.get(workerId);
if (metrics) {
metrics.health.status = "healthy";
metrics.health.lastCheck = new Date();
}
Logger.logger.info("cluster", `Worker ${workerId} is listening`);
}
/**
* Handle worker disconnect event
*/
async handleWorkerDisconnect(workerId, worker) {
this.workerPool.active.delete(workerId);
this.workerPool.draining.add(workerId);
const metrics = this.workerMetrics.get(workerId);
if (metrics) {
metrics.health.status = "critical";
}
Logger.logger.warn("cluster", `Worker ${workerId} disconnected`);
}
/**
* Handle worker exit event with intelligent restart logic
*/
async handleWorkerExit(workerId, worker, code, signal) {
// Clean up worker references
this.workers.delete(workerId);
this.workerPool.active.delete(workerId);
this.workerPool.pending.delete(workerId);
this.workerPool.draining.delete(workerId);
this.workerPool.currentSize = Math.max(0, this.workerPool.currentSize - 1);
// Record death information
this.workerPool.dead.set(workerId, {
diedAt: new Date(),
reason: signal ? `Signal: ${signal}` : `Exit code: ${code}`,
exitCode: code,
signal,
restartCount: this.restartCounts.get(workerId) || 0,
});
// Update metrics
const metrics = this.workerMetrics.get(workerId);
if (metrics) {
metrics.health.status = "dead";
}
this.emit("worker:died", workerId, code || 0, signal || "");
Logger.logger.error("cluster", `Worker ${workerId} exited with code ${code} and signal ${signal}`);
// Attempt restart if conditions are met
if (this.shouldRestartWorker(workerId, code, signal) &&
!this.isShuttingDown) {
await this.restartWorker(workerId);
}
}
/**
* Enhanced worker restart decision logic
*/
shouldRestartWorker(workerId, code, signal) {
const respawnConfig = this.config.processManagement?.respawn;
if (respawnConfig === false)
return false;
const maxRestarts = this.config.processManagement?.maxRestarts || 5;
const currentRestarts = this.restartCounts.get(workerId) || 0;
if (currentRestarts >= maxRestarts) {
Logger.logger.warn("cluster", `Worker ${workerId} exceeded max restarts (${maxRestarts})`);
return false;
}
// Don't restart on intentional shutdown
if (signal === "SIGTERM" ||
signal === "SIGINT" ||
signal === "SIGKILL") {
return false;
}
// Don't restart on successful exit
if (code === 0) {
return false;
}
// Check restart rate limiting
const lastRestart = this.lastRestartTime.get(workerId);
if (lastRestart) {
const timeSinceRestart = Date.now() - lastRestart.getTime();
const minRestartInterval = 10000; // 10 seconds minimum between restarts
if (timeSinceRestart < minRestartInterval) {
Logger.logger.warn("cluster", `Worker ${workerId} restart rate limited`);
return false;
}
}
return true;
}
/**
* Restart worker with exponential backoff
*/
async restartWorker(workerId) {
const currentRestarts = this.restartCounts.get(workerId) || 0;
const baseDelay = this.config.processManagement?.restartDelay || 1000;
// Exponential backoff: delay = baseDelay * 2^restarts (max 30 seconds)
const restartDelay = Math.min(baseDelay * Math.pow(2, currentRestarts), 30000);
// Update restart tracking
this.restartCounts.set(workerId, currentRestarts + 1);
this.lastRestartTime.set(workerId, new Date());
Logger.logger.info("cluster", `Restarting worker ${workerId} in ${restartDelay}ms (attempt ${currentRestarts + 1})`);
// Wait for restart delay
await new Promise((resolve) => setTimeout(resolve, restartDelay));
try {
// Fork new worker
const newWorker = cluster__namespace.fork();
const newWorkerId = this.getWorkerId(newWorker);
// Transfer restart history to new worker
this.restartCounts.set(newWorkerId, currentRestarts + 1);
this.lastRestartTime.set(newWorkerId, new Date());
this.emit("worker:restarted", newWorkerId, workerId);
Logger.logger.info("cluster", `Successfully restarted worker ${workerId} as ${newWorkerId}`);
}
catch (error) {
Logger.logger.error("cluster", `Failed to restart worker ${workerId}: ${error.message}`);
const securityError = errorHandler.createSecurityError(`Worker restart failed: ${error.message}`, errorHandler.ErrorType.INTERNAL, errorHandler.ErrorSeverity.HIGH, "WORKER_RESTART_ERROR", { operation: "restart_worker" });
this.errorLogger.logError(securityError);
}
}
/**
* Generate deterministic worker ID
*/
getWorkerId(worker) {
return `worker_${worker.id}_${worker.process.pid || "unknown"}`;
}
/**
* Start workers with intelligent batching
*/
async startWorkers(count) {
if (!cluster__namespace.isPrimary && !cluster__namespace.isMaster) {
throw new Error("startWorkers can only be called from the primary process");
}
const workerCount = count || this.workerPool.targetSize;
const batchSize = Math.min(workerCount, 4); // Default batch size
const startDelay = 200; // Default start delay
Logger.logger.info("cluster", `Starting ${workerCount} workers in batches of ${batchSize}`);
for (let i = 0; i < workerCount; i += batchSize) {
const batchPromises = [];
const batchEnd = Math.min(i + batchSize, workerCount);
// Start batch of workers
for (let j = i; j < batchEnd; j++) {
batchPromises.push(this.startSingleWorkerInternal(j + 1, workerCount));
}
// Wait for batch to complete
await Promise.allSettled(batchPromises);
// Delay between batches to avoid system overload
if (batchEnd < workerCount) {
await new Promise((resolve) => setTimeout(resolve, startDelay));
}
}
Logger.logger.info("cluster", `Successfully started ${this.workerPool.currentSize}/${workerCount} workers`);
}
/**
* Internal method to start a single worker with enhanced error handling
*/
async startSingleWorkerInternal(workerNum, totalWorkers) {
try {
const worker = cluster__namespace.fork();
const workerId = this.getWorkerId(worker);
Logger.logger.info("cluster", `Started worker ${workerNum}/${totalWorkers} (ID: ${workerId}, PID: ${worker.process.pid})`);
// Wait for worker to come online with timeout
await this.waitForWorkerOnline(worker, workerId);
}
catch (error) {
const securityError = errorHandler.createSecurityError(`Failed to start worker ${workerNum}: ${error.message}`, errorHandler.ErrorType.INTERNAL, errorHandler.ErrorSeverity.HIGH, "WORKER_START_ERROR", { operation: "start_worker" });
this.errorLogger.logError(securityError);
throw error;
}
}
}
exports.WorkerManager = WorkerManager;
//# sourceMappingURL=WorkerManager.js.map