cube-ms
Version:
Production-ready microservice framework with health monitoring, validation, error handling, and Docker Swarm support
454 lines (401 loc) • 13.5 kB
JavaScript
import { performance } from 'perf_hooks';
import { config } from '../config/environment.js';
import os from 'os';
/**
* Performance monitoring and resource management for production environments
* Tracks memory usage, CPU usage, response times, and provides alerts
*/
export class PerformanceMonitor {
constructor(logger, options = {}) {
this.logger = logger;
this.config = {
metricsInterval: options.metricsInterval || config.monitoring.metrics.interval || 10000,
memoryWarningThreshold: options.memoryWarningThreshold || config.performance.memory.memoryWarningThreshold || 256,
memoryErrorThreshold: options.memoryErrorThreshold || 512, // MB
cpuWarningThreshold: options.cpuWarningThreshold || 80, // %
responseTimeWarningThreshold: options.responseTimeWarningThreshold || 1000, // ms
maxConnections: options.maxConnections || config.performance.connections.maxConcurrent || 1000,
enableGC: options.enableGC !== false,
enableProcessMetrics: options.enableProcessMetrics !== false,
enableResourceLimits: options.enableResourceLimits !== false
};
// Metrics storage
this.metrics = {
requests: {
total: 0,
active: 0,
errors: 0,
responses: new Map() // response time tracking
},
memory: {
current: 0,
peak: 0,
warnings: 0,
lastGC: Date.now()
},
cpu: {
usage: 0,
loadAverage: [0, 0, 0],
warnings: 0
},
connections: {
active: 0,
total: 0,
rejected: 0
},
responseTimes: [],
startTime: Date.now()
};
// Active requests tracking
this.activeRequests = new Map();
// Connection tracking
this.activeConnections = new Set();
// Start monitoring
this.startMonitoring();
// Setup garbage collection monitoring
if (this.config.enableGC) {
this.setupGCMonitoring();
}
// Setup process monitoring
if (this.config.enableProcessMetrics) {
this.setupProcessMonitoring();
}
}
/**
* Express middleware for request/response monitoring
*/
middleware() {
return (req, res, next) => {
const requestId = req.headers['log-id'] || `req-${Date.now()}-${Math.random()}`;
const startTime = performance.now();
// Track active request
this.metrics.requests.active++;
this.metrics.requests.total++;
this.activeRequests.set(requestId, {
startTime,
path: req.path,
method: req.method,
ip: req.socket.remoteAddress
});
// Connection limit check
if (this.config.enableResourceLimits && this.metrics.connections.active >= this.config.maxConnections) {
this.metrics.connections.rejected++;
this.logger.warn('Connection limit exceeded', {
activeConnections: this.metrics.connections.active,
maxConnections: this.config.maxConnections
});
return res.status(503).json({
error: true,
message: 'Service temporarily unavailable - too many connections',
statusCode: 503
});
}
// Response handling
res.on('finish', () => {
const endTime = performance.now();
const responseTime = endTime - startTime;
// Update metrics
this.metrics.requests.active--;
this.activeRequests.delete(requestId);
// Track response time
this.metrics.responseTimes.push(responseTime);
if (this.metrics.responseTimes.length > 1000) {
this.metrics.responseTimes = this.metrics.responseTimes.slice(-1000);
}
// Track errors
if (res.statusCode >= 400) {
this.metrics.requests.errors++;
}
// Log slow responses
if (responseTime > this.config.responseTimeWarningThreshold) {
this.logger.warn('Slow response detected', {
requestId,
responseTime: Math.round(responseTime),
path: req.path,
method: req.method,
statusCode: res.statusCode
});
}
// Log performance metrics
if (req.logger) {
req.logger.debug('Request completed', {
requestId,
responseTime: Math.round(responseTime),
statusCode: res.statusCode,
activeRequests: this.metrics.requests.active
});
}
});
// Handle connection close
res.on('close', () => {
if (this.activeRequests.has(requestId)) {
this.metrics.requests.active--;
this.activeRequests.delete(requestId);
}
});
next();
};
}
/**
* Track HTTP connections
*/
trackConnection(socket) {
this.activeConnections.add(socket);
this.metrics.connections.active++;
this.metrics.connections.total++;
socket.on('close', () => {
this.activeConnections.delete(socket);
this.metrics.connections.active--;
});
}
/**
* Start monitoring intervals
*/
startMonitoring() {
// Main metrics collection
this.metricsInterval = setInterval(() => {
this.collectMetrics();
}, this.config.metricsInterval);
// Memory monitoring (more frequent)
this.memoryInterval = setInterval(() => {
this.checkMemoryUsage();
}, 5000);
// CPU monitoring
this.cpuInterval = setInterval(() => {
this.checkCPUUsage();
}, 10000);
}
/**
* Collect comprehensive metrics
*/
collectMetrics() {
const memUsage = process.memoryUsage();
const cpuUsage = process.cpuUsage();
const loadAvg = os.loadavg();
// Update metrics
this.metrics.memory.current = Math.round(memUsage.rss / 1024 / 1024); // MB
this.metrics.memory.peak = Math.max(this.metrics.memory.peak, this.metrics.memory.current);
this.metrics.cpu.loadAverage = loadAvg;
// Calculate response time percentiles
const responseTimes = [...this.metrics.responseTimes].sort((a, b) => a - b);
const p50 = this.percentile(responseTimes, 50);
const p95 = this.percentile(responseTimes, 95);
const p99 = this.percentile(responseTimes, 99);
const metrics = {
timestamp: new Date().toISOString(),
uptime: Math.round((Date.now() - this.metrics.startTime) / 1000),
memory: {
rss: Math.round(memUsage.rss / 1024 / 1024),
heapUsed: Math.round(memUsage.heapUsed / 1024 / 1024),
heapTotal: Math.round(memUsage.heapTotal / 1024 / 1024),
external: Math.round(memUsage.external / 1024 / 1024),
heapUsagePercent: Math.round((memUsage.heapUsed / memUsage.heapTotal) * 100)
},
cpu: {
loadAverage: loadAvg,
usage: this.metrics.cpu.usage
},
requests: {
total: this.metrics.requests.total,
active: this.metrics.requests.active,
errors: this.metrics.requests.errors,
errorRate: this.metrics.requests.total > 0 ?
Math.round((this.metrics.requests.errors / this.metrics.requests.total) * 100) : 0
},
responseTimes: {
count: responseTimes.length,
mean: responseTimes.length > 0 ? Math.round(responseTimes.reduce((a, b) => a + b, 0) / responseTimes.length) : 0,
p50: Math.round(p50),
p95: Math.round(p95),
p99: Math.round(p99)
},
connections: {
active: this.metrics.connections.active,
total: this.metrics.connections.total,
rejected: this.metrics.connections.rejected
}
};
// Log metrics
this.logger.info('Performance metrics', metrics);
return metrics;
}
/**
* Check memory usage and trigger warnings/cleanup
*/
checkMemoryUsage() {
const memUsage = process.memoryUsage();
const currentMB = Math.round(memUsage.rss / 1024 / 1024);
this.metrics.memory.current = currentMB;
// Warning threshold
if (currentMB > this.config.memoryWarningThreshold) {
this.metrics.memory.warnings++;
this.logger.warn('High memory usage detected', {
currentMemory: currentMB,
threshold: this.config.memoryWarningThreshold,
heapUsed: Math.round(memUsage.heapUsed / 1024 / 1024),
heapTotal: Math.round(memUsage.heapTotal / 1024 / 1024)
});
// Trigger garbage collection if enabled
if (this.config.enableGC && global.gc) {
try {
const beforeGC = process.memoryUsage();
global.gc();
const afterGC = process.memoryUsage();
this.metrics.memory.lastGC = Date.now();
this.logger.info('Garbage collection triggered', {
beforeGC: Math.round(beforeGC.rss / 1024 / 1024),
afterGC: Math.round(afterGC.rss / 1024 / 1024),
freed: Math.round((beforeGC.rss - afterGC.rss) / 1024 / 1024)
});
} catch (error) {
this.logger.error('Garbage collection failed', { error: error.message });
}
}
}
// Error threshold
if (currentMB > this.config.memoryErrorThreshold) {
this.logger.error('Critical memory usage detected', {
currentMemory: currentMB,
threshold: this.config.memoryErrorThreshold,
action: 'Consider restarting service'
});
}
}
/**
* Check CPU usage
*/
checkCPUUsage() {
const loadAvg = os.loadavg();
const cpuCount = os.cpus().length;
// Calculate CPU usage as percentage of available cores
const cpuUsage = Math.round((loadAvg[0] / cpuCount) * 100);
this.metrics.cpu.usage = cpuUsage;
this.metrics.cpu.loadAverage = loadAvg;
if (cpuUsage > this.config.cpuWarningThreshold) {
this.metrics.cpu.warnings++;
this.logger.warn('High CPU usage detected', {
cpuUsage,
threshold: this.config.cpuWarningThreshold,
loadAverage: loadAvg,
cpuCount
});
}
}
/**
* Setup garbage collection monitoring
*/
setupGCMonitoring() {
if (!global.gc) {
this.logger.warn('Garbage collection monitoring not available (run with --expose-gc)');
return;
}
// Periodic GC if memory is high
setInterval(() => {
const memUsage = process.memoryUsage();
const heapUsagePercent = (memUsage.heapUsed / memUsage.heapTotal) * 100;
if (heapUsagePercent > 80 && Date.now() - this.metrics.memory.lastGC > 30000) {
try {
global.gc();
this.metrics.memory.lastGC = Date.now();
this.logger.debug('Automatic garbage collection triggered', {
heapUsagePercent: Math.round(heapUsagePercent)
});
} catch (error) {
this.logger.error('Automatic garbage collection failed', { error: error.message });
}
}
}, 30000); // Check every 30 seconds
}
/**
* Setup process monitoring
*/
setupProcessMonitoring() {
// Monitor uncaught exceptions
process.on('uncaughtException', (error) => {
this.logger.error('Uncaught exception detected', {
error: error.message,
stack: error.stack,
uptime: process.uptime(),
memoryUsage: process.memoryUsage()
});
});
// Monitor unhandled rejections
process.on('unhandledRejection', (reason, promise) => {
this.logger.error('Unhandled promise rejection detected', {
reason: reason?.message || reason,
stack: reason?.stack,
uptime: process.uptime(),
memoryUsage: process.memoryUsage()
});
});
// Monitor process warnings
process.on('warning', (warning) => {
this.logger.warn('Process warning', {
name: warning.name,
message: warning.message,
stack: warning.stack
});
});
}
/**
* Calculate percentile
*/
percentile(sortedArray, p) {
if (sortedArray.length === 0) return 0;
const index = Math.ceil((p / 100) * sortedArray.length) - 1;
return sortedArray[Math.max(0, Math.min(index, sortedArray.length - 1))];
}
/**
* Get current performance status
*/
getStatus() {
const memUsage = process.memoryUsage();
return {
healthy: this.isHealthy(),
uptime: Math.round((Date.now() - this.metrics.startTime) / 1000),
memory: {
current: Math.round(memUsage.rss / 1024 / 1024),
peak: this.metrics.memory.peak,
warnings: this.metrics.memory.warnings
},
cpu: {
usage: this.metrics.cpu.usage,
warnings: this.metrics.cpu.warnings
},
requests: {
active: this.metrics.requests.active,
total: this.metrics.requests.total,
errors: this.metrics.requests.errors
},
connections: {
active: this.metrics.connections.active,
rejected: this.metrics.connections.rejected
}
};
}
/**
* Check if service is healthy
*/
isHealthy() {
const memUsage = process.memoryUsage();
const currentMB = Math.round(memUsage.rss / 1024 / 1024);
return currentMB < this.config.memoryErrorThreshold &&
this.metrics.cpu.usage < 95 &&
this.metrics.requests.active < this.config.maxConnections;
}
/**
* Stop monitoring
*/
stop() {
if (this.metricsInterval) {
clearInterval(this.metricsInterval);
}
if (this.memoryInterval) {
clearInterval(this.memoryInterval);
}
if (this.cpuInterval) {
clearInterval(this.cpuInterval);
}
}
}
export default PerformanceMonitor;