UNPKG

ruv-swarm

Version:

High-performance neural network swarm orchestration in WebAssembly

533 lines (462 loc) 13.5 kB
/** * Diagnostic utilities for ruv-swarm * Helps debug connection issues and performance problems */ import { Logger } from './logger.js'; import { loggingConfig } from './logging-config.js'; import fs from 'fs'; import path from 'path'; import { performance } from 'perf_hooks'; /** * Connection diagnostics */ export class ConnectionDiagnostics { constructor(logger = null) { this.logger = logger || loggingConfig.getLogger('diagnostics', { level: 'DEBUG' }); this.connectionHistory = []; this.maxHistorySize = 100; this.activeConnections = new Map(); } /** * Record connection event */ recordEvent(connectionId, event, details = {}) { const timestamp = new Date().toISOString(); const entry = { connectionId, event, timestamp, details, memoryUsage: process.memoryUsage(), cpuUsage: process.cpuUsage(), }; this.connectionHistory.push(entry); if (this.connectionHistory.length > this.maxHistorySize) { this.connectionHistory.shift(); } // Track active connections if (event === 'established') { this.activeConnections.set(connectionId, { startTime: Date.now(), ...details, }); } else if (event === 'closed' || event === 'failed') { const conn = this.activeConnections.get(connectionId); if (conn) { entry.duration = Date.now() - conn.startTime; this.activeConnections.delete(connectionId); } } this.logger.debug('Connection event recorded', entry); return entry; } /** * Get connection summary */ getConnectionSummary() { const events = this.connectionHistory.reduce((acc, event) => { acc[event.event] = (acc[event.event] || 0) + 1; return acc; }, {}); const failures = this.connectionHistory.filter(e => e.event === 'failed'); const recentFailures = failures.slice(-10); return { totalEvents: this.connectionHistory.length, eventCounts: events, activeConnections: this.activeConnections.size, recentFailures, failureRate: failures.length / this.connectionHistory.length, }; } /** * Analyze connection patterns */ analyzePatterns() { const failures = this.connectionHistory.filter(e => e.event === 'failed'); // Group failures by error type const errorTypes = failures.reduce((acc, failure) => { const error = failure.details.error?.message || 'Unknown'; acc[error] = (acc[error] || 0) + 1; return acc; }, {}); // Find time patterns const hourlyFailures = new Array(24).fill(0); failures.forEach(failure => { const hour = new Date(failure.timestamp).getHours(); hourlyFailures[hour]++; }); // Memory patterns at failure time const memoryAtFailure = failures.map(f => ({ timestamp: f.timestamp, heapUsed: f.memoryUsage.heapUsed / (1024 * 1024), // MB external: f.memoryUsage.external / (1024 * 1024), // MB })); return { errorTypes, hourlyFailures, memoryAtFailure, avgMemoryAtFailure: memoryAtFailure.reduce((sum, m) => sum + m.heapUsed, 0) / memoryAtFailure.length, }; } /** * Generate diagnostic report */ generateReport() { const summary = this.getConnectionSummary(); const patterns = this.analyzePatterns(); const systemInfo = { platform: process.platform, nodeVersion: process.version, uptime: process.uptime(), memoryUsage: process.memoryUsage(), cpuUsage: process.cpuUsage(), }; const report = { timestamp: new Date().toISOString(), system: systemInfo, connections: summary, patterns, recommendations: this.generateRecommendations(summary, patterns), }; this.logger.info('Diagnostic report generated', { failureRate: summary.failureRate, activeConnections: summary.activeConnections, }); return report; } /** * Generate recommendations based on patterns */ generateRecommendations(summary, patterns) { const recommendations = []; // High failure rate if (summary.failureRate > 0.1) { recommendations.push({ severity: 'high', issue: 'High connection failure rate', suggestion: 'Check network stability and MCP server configuration', }); } // Memory issues if (patterns.avgMemoryAtFailure > 500) { recommendations.push({ severity: 'medium', issue: 'High memory usage during failures', suggestion: 'Consider increasing memory limits or optimizing memory usage', }); } // Specific error patterns Object.entries(patterns.errorTypes).forEach(([error, count]) => { if (count > 5) { recommendations.push({ severity: 'medium', issue: `Recurring error: ${error}`, suggestion: `Investigate root cause of: ${error}`, }); } }); return recommendations; } } /** * Performance diagnostics */ export class PerformanceDiagnostics { constructor(logger = null) { this.logger = logger || loggingConfig.getLogger('diagnostics', { level: 'DEBUG' }); this.operations = new Map(); this.thresholds = { 'swarm_init': 1000, // 1 second 'agent_spawn': 500, // 500ms 'task_orchestrate': 2000, // 2 seconds 'neural_train': 5000, // 5 seconds }; } /** * Start tracking an operation */ startOperation(name, metadata = {}) { const id = `${name}-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`; this.operations.set(id, { name, startTime: performance.now(), startMemory: process.memoryUsage(), metadata, }); return id; } /** * End tracking an operation */ endOperation(id, success = true) { const operation = this.operations.get(id); if (!operation) { return null; } const endTime = performance.now(); const duration = endTime - operation.startTime; const endMemory = process.memoryUsage(); const result = { ...operation, endTime, duration, success, memoryDelta: { heapUsed: endMemory.heapUsed - operation.startMemory.heapUsed, external: endMemory.external - operation.startMemory.external, }, aboveThreshold: duration > (this.thresholds[operation.name] || 1000), }; this.operations.delete(id); if (result.aboveThreshold) { this.logger.warn('Operation exceeded threshold', { operation: operation.name, duration, threshold: this.thresholds[operation.name], }); } return result; } /** * Get slow operations */ getSlowOperations(limit = 10) { const completed = []; // Get completed operations from logger's performance tracker // This would need to be implemented to store historical data return completed .filter(op => op.aboveThreshold) .sort((a, b) => b.duration - a.duration) .slice(0, limit); } } /** * System diagnostics */ export class SystemDiagnostics { constructor(logger = null) { this.logger = logger || loggingConfig.getLogger('diagnostics', { level: 'DEBUG' }); this.samples = []; this.maxSamples = 60; // 1 minute of samples at 1Hz } /** * Collect system sample */ collectSample() { const sample = { timestamp: Date.now(), memory: process.memoryUsage(), cpu: process.cpuUsage(), handles: process._getActiveHandles?.().length || 0, requests: process._getActiveRequests?.().length || 0, }; this.samples.push(sample); if (this.samples.length > this.maxSamples) { this.samples.shift(); } return sample; } /** * Start monitoring */ startMonitoring(interval = 1000) { if (this.monitorInterval) { this.stopMonitoring(); } this.monitorInterval = setInterval(() => { const sample = this.collectSample(); // Check for anomalies if (sample.memory.heapUsed > 500 * 1024 * 1024) { // 500MB this.logger.warn('High memory usage detected', { heapUsed: `${(sample.memory.heapUsed / 1024 / 1024).toFixed(2)} MB`, }); } if (sample.handles > 100) { this.logger.warn('High number of active handles', { handles: sample.handles, }); } }, interval); this.logger.info('System monitoring started', { interval }); } /** * Stop monitoring */ stopMonitoring() { if (this.monitorInterval) { clearInterval(this.monitorInterval); this.monitorInterval = null; this.logger.info('System monitoring stopped'); } } /** * Get system health */ getSystemHealth() { if (this.samples.length === 0) { return { status: 'unknown', message: 'No samples collected' }; } const latest = this.samples[this.samples.length - 1]; const avgMemory = this.samples.reduce((sum, s) => sum + s.memory.heapUsed, 0) / this.samples.length; let status = 'healthy'; const issues = []; if (latest.memory.heapUsed > 400 * 1024 * 1024) { status = 'warning'; issues.push('High memory usage'); } if (latest.handles > 50) { status = 'warning'; issues.push('Many active handles'); } if (avgMemory > 300 * 1024 * 1024) { status = 'warning'; issues.push('Sustained high memory usage'); } return { status, issues, metrics: { currentMemory: `${(latest.memory.heapUsed / 1024 / 1024).toFixed(2)} MB`, avgMemory: `${(avgMemory / 1024 / 1024).toFixed(2)} MB`, handles: latest.handles, requests: latest.requests, }, }; } } /** * Main diagnostics manager */ export class DiagnosticsManager { constructor() { this.logger = loggingConfig.getLogger('diagnostics', { level: 'DEBUG' }); this.connection = new ConnectionDiagnostics(this.logger); this.performance = new PerformanceDiagnostics(this.logger); this.system = new SystemDiagnostics(this.logger); } /** * Enable all diagnostics */ enableAll() { this.system.startMonitoring(); this.logger.info('All diagnostics enabled'); } /** * Disable all diagnostics */ disableAll() { this.system.stopMonitoring(); this.logger.info('All diagnostics disabled'); } /** * Generate full diagnostic report */ async generateFullReport(outputPath = null) { const report = { timestamp: new Date().toISOString(), connection: this.connection.generateReport(), performance: { slowOperations: this.performance.getSlowOperations(), }, system: this.system.getSystemHealth(), logs: await this.collectRecentLogs(), }; if (outputPath) { const reportPath = path.resolve(outputPath); fs.writeFileSync(reportPath, JSON.stringify(report, null, 2)); this.logger.info('Diagnostic report saved', { path: reportPath }); } return report; } /** * Collect recent logs */ async collectRecentLogs() { // This would read from log files if file logging is enabled // For now, return a placeholder return { message: 'Log collection would read from log files', logsEnabled: process.env.LOG_TO_FILE === 'true', }; } /** * Run diagnostic tests */ async runDiagnosticTests() { const tests = []; // Test 1: Memory allocation tests.push(await this.testMemoryAllocation()); // Test 2: File system access tests.push(await this.testFileSystem()); // Test 3: WASM loading tests.push(await this.testWasmLoading()); return { timestamp: new Date().toISOString(), tests, summary: { total: tests.length, passed: tests.filter(t => t.success).length, failed: tests.filter(t => !t.success).length, }, }; } async testMemoryAllocation() { try { const start = process.memoryUsage().heapUsed; const testArray = new Array(1000000).fill(0); const end = process.memoryUsage().heapUsed; return { name: 'Memory Allocation', success: true, allocated: `${((end - start) / 1024 / 1024).toFixed(2)} MB`, }; } catch (error) { return { name: 'Memory Allocation', success: false, error: error.message, }; } } async testFileSystem() { try { const testPath = path.join(process.cwd(), 'logs', '.diagnostic-test'); fs.mkdirSync(path.dirname(testPath), { recursive: true }); fs.writeFileSync(testPath, 'test'); fs.unlinkSync(testPath); return { name: 'File System Access', success: true, path: testPath, }; } catch (error) { return { name: 'File System Access', success: false, error: error.message, }; } } async testWasmLoading() { try { // Test if WASM module can be loaded const wasmPath = path.join(process.cwd(), 'wasm', 'ruv_swarm_wasm_bg.wasm'); const exists = fs.existsSync(wasmPath); return { name: 'WASM Module Check', success: exists, path: wasmPath, exists, }; } catch (error) { return { name: 'WASM Module Check', success: false, error: error.message, }; } } } // Singleton instance export const diagnostics = new DiagnosticsManager(); export default diagnostics;