UNPKG

claude-flow

Version:

Ruflo - Enterprise AI agent orchestration for Claude Code. Deploy 60+ specialized agents in coordinated swarms with self-learning, fault-tolerant consensus, vector memory, and MCP integration

688 lines 33.4 kB
/** * System MCP Tools for CLI * * V2 Compatibility - System monitoring tools: status, metrics, health * * ✅ Uses REAL system metrics via Node.js APIs: * - process.memoryUsage() for real memory stats * - process.cpuUsage() for real CPU stats * - os module for system information */ import { getProjectCwd } from './types.js'; import { validateIdentifier } from './validate-input.js'; import { existsSync, readFileSync, writeFileSync, mkdirSync, statfsSync } from 'node:fs'; import { join, dirname } from 'node:path'; import { fileURLToPath } from 'node:url'; import * as os from 'node:os'; import * as dns from 'node:dns'; // Read version dynamically from package.json function getPackageVersion() { try { const __filename = fileURLToPath(import.meta.url); const __dirname = dirname(__filename); for (const depth of ['../..', '../../..']) { const pkgPath = join(__dirname, depth, 'package.json'); if (existsSync(pkgPath)) { const pkg = JSON.parse(readFileSync(pkgPath, 'utf-8')); if (pkg.name?.includes('claude-flow') || pkg.name === 'ruflo') { return pkg.version || '3.0.0'; } } } return '3.0.0'; } catch { return '3.0.0'; } } const PKG_VERSION = getPackageVersion(); // Storage paths const STORAGE_DIR = '.claude-flow'; const SYSTEM_DIR = 'system'; const METRICS_FILE = 'metrics.json'; function getSystemDir() { return join(getProjectCwd(), STORAGE_DIR, SYSTEM_DIR); } function getMetricsPath() { return join(getSystemDir(), METRICS_FILE); } function ensureSystemDir() { const dir = getSystemDir(); if (!existsSync(dir)) { mkdirSync(dir, { recursive: true }); } } function loadMetrics() { try { const path = getMetricsPath(); if (existsSync(path)) { return JSON.parse(readFileSync(path, 'utf-8')); } } catch { // Return default metrics } return { startTime: new Date().toISOString(), lastCheck: new Date().toISOString(), uptime: 0, health: 1.0, cpu: os.loadavg()[0] * 100 / os.cpus().length, memory: { used: Math.round((os.totalmem() - os.freemem()) / 1024 / 1024), total: Math.round(os.totalmem() / 1024 / 1024) }, agents: { active: 0, total: 0 }, tasks: { pending: 0, completed: 0, failed: 0 }, requests: { total: 0, success: 0, errors: 0 }, }; } function saveMetrics(metrics) { ensureSystemDir(); metrics.lastCheck = new Date().toISOString(); writeFileSync(getMetricsPath(), JSON.stringify(metrics, null, 2), 'utf-8'); } export const systemTools = [ { name: 'system_status', description: 'Get overall system status Use when native Bash is wrong because you need Ruflo runtime metrics (HNSW index size, ReasoningBank state, swarm health, breaker status) — those are not in /proc, only in the running daemon. For OS-level info (uptime, disk, mem), native Bash + standard tools are fine.', category: 'system', inputSchema: { type: 'object', properties: { verbose: { type: 'boolean', description: 'Include detailed information' }, components: { type: 'array', items: { type: 'string' }, description: 'Specific components to check' }, }, }, handler: async (input) => { const metrics = loadMetrics(); // #2235(B) — live process uptime, not the persisted metrics.startTime // (which is the file's creation timestamp and survived across restarts, // making system_status report stale ~8.8-day uptime on a fresh server). const uptime = Math.floor(process.uptime() * 1000); const status = { status: metrics.health >= 0.8 ? 'healthy' : metrics.health >= 0.5 ? 'degraded' : 'unhealthy', uptime, uptimeFormatted: `${Math.floor(uptime / 3600000)}h ${Math.floor((uptime % 3600000) / 60000)}m`, version: PKG_VERSION, components: { swarm: { status: 'running', health: metrics.health }, memory: { status: 'unknown', _note: 'Health not measured — use system_health for real checks' }, neural: { status: 'unknown', _note: 'Health not measured — use system_health for real checks' }, mcp: { status: 'unknown', _note: 'Health not measured — use system_health for real checks' }, }, lastCheck: new Date().toISOString(), }; if (input.verbose) { return { ...status, metrics: { cpu: metrics.cpu, memory: metrics.memory, agents: metrics.agents, tasks: metrics.tasks, }, }; } return status; }, }, { name: 'system_metrics', description: 'Get system metrics and performance data Use when native Bash is wrong because you need Ruflo runtime metrics (HNSW index size, ReasoningBank state, swarm health, breaker status) — those are not in /proc, only in the running daemon. For OS-level info (uptime, disk, mem), native Bash + standard tools are fine.', category: 'system', inputSchema: { type: 'object', properties: { category: { type: 'string', enum: ['all', 'cpu', 'memory', 'agents', 'tasks', 'requests'], description: 'Metrics category' }, timeRange: { type: 'string', description: 'Time range (e.g., 1h, 24h, 7d)' }, format: { type: 'string', enum: ['json', 'table', 'summary'], description: 'Output format' }, }, }, handler: async (input) => { const store = loadMetrics(); const category = input.category || 'all'; // Get REAL system metrics via Node.js APIs const memUsage = process.memoryUsage(); const loadAvg = os.loadavg(); const cpus = os.cpus(); const totalMem = os.totalmem(); const freeMem = os.freemem(); // Read real agent/task counts — try AgentDB first, fallback to JSON stores let agentCounts = { active: 0, total: 0 }; let taskCounts = { pending: 0, completed: 0, failed: 0 }; let _metricsSource = 'none'; // Primary: AgentDB (sql.js + HNSW) try { const bridge = await import('../memory/memory-bridge.js'); const agentResults = await bridge.bridgeListEntries({ namespace: 'agents', limit: 10000 }); const agentEntries = agentResults?.entries; if (agentEntries && agentEntries.length > 0) { let active = 0; for (const a of agentEntries) { try { const meta = a.metadata ? JSON.parse(a.metadata) : (a.value ? JSON.parse(a.value) : {}); if (meta.status === 'active' || meta.status === 'running') active++; } catch { /* skip unparseable */ } } agentCounts = { total: agentEntries.length, active }; _metricsSource = 'agentdb'; } const taskResults = await bridge.bridgeListEntries({ namespace: 'tasks', limit: 10000 }); const taskEntries = taskResults?.entries; if (taskEntries && taskEntries.length > 0) { let pending = 0, completed = 0, failed = 0; for (const t of taskEntries) { try { const meta = t.metadata ? JSON.parse(t.metadata) : (t.value ? JSON.parse(t.value) : {}); if (meta.status === 'pending' || meta.status === 'assigned') pending++; else if (meta.status === 'completed') completed++; else if (meta.status === 'failed') failed++; } catch { /* skip */ } } taskCounts = { pending, completed, failed }; _metricsSource = 'agentdb'; } } catch { /* AgentDB not available, try JSON fallback */ } // Fallback: JSON store files (backward compatibility) if (_metricsSource === 'none') { try { const agentStorePath = join(getProjectCwd(), STORAGE_DIR, 'agents', 'store.json'); if (existsSync(agentStorePath)) { const agentStore = JSON.parse(readFileSync(agentStorePath, 'utf-8')); const agents = Object.values(agentStore.agents || {}); agentCounts = { total: agents.length, active: agents.filter(a => a.status === 'active' || a.status === 'running').length, }; _metricsSource = 'json-store'; } } catch { /* agent store not available */ } try { const taskStorePath = join(getProjectCwd(), STORAGE_DIR, 'tasks', 'store.json'); if (existsSync(taskStorePath)) { const taskStore = JSON.parse(readFileSync(taskStorePath, 'utf-8')); const tasks = Object.values(taskStore.tasks || {}); taskCounts = { pending: tasks.filter(t => t.status === 'pending' || t.status === 'assigned').length, completed: tasks.filter(t => t.status === 'completed').length, failed: tasks.filter(t => t.status === 'failed').length, }; _metricsSource = 'json-store'; } } catch { /* task store not available */ } } const currentMetrics = { ...store, cpu: loadAvg[0] * 100 / cpus.length, // Real CPU load percentage memory: { used: Math.round((totalMem - freeMem) / 1024 / 1024), // Real MB used total: Math.round(totalMem / 1024 / 1024), // Real total MB }, agents: agentCounts, tasks: taskCounts, requests: await (async () => { try { const { getRequestCounts } = await import('./request-tracker.js'); const live = getRequestCounts(); if (live.total > 0) { return { total: live.total, success: live.success, errors: live.errors }; } } catch { /* tracker not available — fall back to stored value */ } return store.requests; })(), uptime: Math.floor(process.uptime() * 1000), // #2235(B) — live process uptime lastCheck: new Date().toISOString(), }; saveMetrics(currentMetrics); if (category === 'all') { return { ...currentMetrics, _real: true, _metricsSource, heap: { used: Math.round(memUsage.heapUsed / 1024 / 1024), total: Math.round(memUsage.heapTotal / 1024 / 1024), external: Math.round(memUsage.external / 1024 / 1024), }, loadAverage: loadAvg, cpuCores: cpus.length, }; } const categoryMap = { cpu: { usage: currentMetrics.cpu, cores: cpus.length, load: loadAvg, model: cpus[0]?.model, _real: true, }, memory: { ...currentMetrics.memory, heap: Math.round(memUsage.heapUsed / 1024 / 1024), heapTotal: Math.round(memUsage.heapTotal / 1024 / 1024), free: Math.round(freeMem / 1024 / 1024), _real: true, }, agents: currentMetrics.agents, tasks: currentMetrics.tasks, requests: currentMetrics.requests, }; return categoryMap[category] || currentMetrics; }, }, { name: 'system_health', description: 'Perform system health check Use when native Bash is wrong because you need Ruflo runtime metrics (HNSW index size, ReasoningBank state, swarm health, breaker status) — those are not in /proc, only in the running daemon. For OS-level info (uptime, disk, mem), native Bash + standard tools are fine.', category: 'system', inputSchema: { type: 'object', properties: { deep: { type: 'boolean', description: 'Perform deep health check' }, components: { type: 'array', items: { type: 'string' }, description: 'Components to check' }, fix: { type: 'boolean', description: 'Attempt to fix issues' }, }, }, handler: async (input) => { const metrics = loadMetrics(); const checks = []; const projectCwd = getProjectCwd(); // Memory DB check — verify any supported store file exists. // #1843: cover sql.js / HNSW / .swarm and root-level rvf/db paths, // not just the legacy `.claude-flow/memory/*` triple. { const t0 = performance.now(); const memoryCandidates = [ join(projectCwd, '.claude-flow', 'memory', 'store.json'), // legacy join(projectCwd, '.claude-flow', 'memory', 'agentdb.sqlite'), join(projectCwd, '.claude-flow', 'memory', 'store.rvf'), join(projectCwd, '.claude-flow', 'memory', 'claude-flow.db'), // sql.js join(projectCwd, '.swarm', 'memory.db'), // swarm join(projectCwd, 'ruvector.db'), // ruvector join(projectCwd, 'agentdb.rvf'), // root rvf ]; const memoryExists = memoryCandidates.some(existsSync); const elapsed = performance.now() - t0; checks.push({ name: 'memory', status: memoryExists ? 'healthy' : 'degraded', latency: Math.round(elapsed * 100) / 100, message: memoryExists ? undefined : 'Memory store not found — run memory init', }); } // Config check — verify config file exists. // #1843: also accept YAML config (.claude-flow/config.yaml) which // the rest of v3 treats as canonical; previous code only counted // .json variants and reported `degraded` when YAML was used. { const t0 = performance.now(); const configCandidates = [ join(projectCwd, '.claude-flow', 'config.json'), join(projectCwd, '.claude-flow', 'config.yaml'), join(projectCwd, '.claude-flow', 'config.yml'), join(projectCwd, 'claude-flow.config.json'), join(projectCwd, 'claude-flow.config.yaml'), join(projectCwd, 'claude-flow.config.yml'), ]; const configExists = configCandidates.some(existsSync); const elapsed = performance.now() - t0; checks.push({ name: 'config', status: configExists ? 'healthy' : 'degraded', latency: Math.round(elapsed * 100) / 100, message: configExists ? undefined : 'Config file not found — run init', }); } // MCP check — this process is the MCP server if stdin is piped { const isStdio = !process.stdin.isTTY; checks.push({ name: 'mcp', status: isStdio ? 'healthy' : 'unknown', message: isStdio ? 'MCP stdio server running (this process)' : 'Not running as MCP server', }); } // Swarm — cannot verify real connectivity, report unknown checks.push({ name: 'swarm', status: 'unknown', message: 'Swarm connectivity not monitored — check coordination store manually', }); // Neural — cannot verify, report unknown checks.push({ name: 'neural', status: 'unknown', message: 'Neural network health not monitored', }); if (input.deep) { // Disk check — real free space via statfsSync (Node 18.15+) { const t0 = performance.now(); try { const stats = statfsSync(projectCwd); const totalBytes = stats.blocks * stats.bsize; const freeBytes = stats.bfree * stats.bsize; const totalGB = Math.round((totalBytes / (1024 ** 3)) * 10) / 10; const freeGB = Math.round((freeBytes / (1024 ** 3)) * 10) / 10; const freePercent = Math.round((freeBytes / totalBytes) * 100); const elapsed = performance.now() - t0; checks.push({ name: 'disk', status: freePercent > 10 ? 'healthy' : 'warning', latency: Math.round(elapsed * 100) / 100, message: `${freeGB}GB free of ${totalGB}GB (${freePercent}%)`, }); } catch { const elapsed = performance.now() - t0; checks.push({ name: 'disk', status: 'unknown', latency: Math.round(elapsed * 100) / 100, message: 'Disk space check failed — statfsSync unavailable', }); } } // Network — DNS resolution check with timeout { const t0 = performance.now(); try { await Promise.race([ dns.promises.lookup('registry.npmjs.org'), new Promise((_, reject) => setTimeout(() => reject(new Error('timeout')), 3000)), ]); const elapsed = performance.now() - t0; checks.push({ name: 'network', status: 'healthy', latency: Math.round(elapsed * 100) / 100, message: 'DNS resolution working', }); } catch { const elapsed = performance.now() - t0; checks.push({ name: 'network', status: 'warning', latency: Math.round(elapsed * 100) / 100, message: 'DNS resolution failed — check network', }); } } // Database — check if coordination store exists { const t0 = performance.now(); const coordPath = join(projectCwd, '.claude-flow', 'coordination', 'store.json'); const dbExists = existsSync(coordPath); const elapsed = performance.now() - t0; checks.push({ name: 'database', status: dbExists ? 'healthy' : 'unknown', latency: Math.round(elapsed * 100) / 100, message: dbExists ? undefined : 'Coordination store not found', }); } } // #1843: exclude `unknown` checks from the health score denominator. // Previously a check reporting `unknown` (e.g. swarm/neural which // can't be probed in-process) was counted as a non-healthy hit and // dragged the score below 100 even when every actionable check was // green. Treat `unknown` as advisory and surface it separately. const healthy = checks.filter(c => c.status === 'healthy').length; const advisory = checks.filter(c => c.status === 'unknown').length; const total = checks.length; const scoreDenominator = total - advisory; const overallHealth = scoreDenominator > 0 ? healthy / scoreDenominator : 1; // Update metrics metrics.health = overallHealth; saveMetrics(metrics); return { overall: overallHealth >= 0.8 ? 'healthy' : overallHealth >= 0.5 ? 'degraded' : 'unhealthy', score: Math.round(overallHealth * 100), checks, healthy, advisory, total, timestamp: new Date().toISOString(), issues: checks.filter(c => c.status !== 'healthy' && c.status !== 'unknown').map(c => ({ component: c.name, status: c.status, suggestion: `Check ${c.name} component configuration`, })), }; }, }, { name: 'system_info', description: 'Get system information Use when native Bash is wrong because you need Ruflo runtime metrics (HNSW index size, ReasoningBank state, swarm health, breaker status) — those are not in /proc, only in the running daemon. For OS-level info (uptime, disk, mem), native Bash + standard tools are fine.', category: 'system', inputSchema: { type: 'object', properties: { include: { type: 'array', items: { type: 'string' }, description: 'Information to include' }, }, }, handler: async () => { // #2215: flashAttention must reflect the runtime probe, not a stale literal. // Same source-of-truth as hooks_intelligence / neural_status so the tools // can never report contradictory state for the same daemon. let flashAttentionAvailable = false; try { const { getFlashAttention } = await import('@claude-flow/neural'); flashAttentionAvailable = getFlashAttention() !== null; } catch { flashAttentionAvailable = false; } return { version: PKG_VERSION, nodeVersion: process.version, platform: process.platform, arch: process.arch, pid: process.pid, cwd: getProjectCwd(), env: process.env.NODE_ENV || 'development', features: { swarm: true, memory: true, neural: true, hnsw: true, quantization: true, flashAttention: flashAttentionAvailable, }, limits: { maxAgents: 100, maxTasks: 1000, maxMemory: '4GB', }, }; }, }, { name: 'system_reset', description: 'Reset system state Use when native Bash is wrong because you need Ruflo runtime metrics (HNSW index size, ReasoningBank state, swarm health, breaker status) — those are not in /proc, only in the running daemon. For OS-level info (uptime, disk, mem), native Bash + standard tools are fine.', category: 'system', inputSchema: { type: 'object', properties: { component: { type: 'string', description: 'Component to reset (all, metrics, agents, tasks)' }, confirm: { type: 'boolean', description: 'Confirm reset' }, }, required: ['confirm'], }, handler: async (input) => { if (!input.confirm) { return { success: false, error: 'Reset requires confirmation' }; } if (input.component) { const v = validateIdentifier(input.component, 'component'); if (!v.valid) return { success: false, error: v.error }; } const component = input.component || 'metrics'; // Reset metrics to defaults const defaultMetrics = { startTime: new Date().toISOString(), lastCheck: new Date().toISOString(), uptime: 0, health: 1.0, cpu: os.loadavg()[0] * 100 / os.cpus().length, memory: { used: Math.round((os.totalmem() - os.freemem()) / 1024 / 1024), total: Math.round(os.totalmem() / 1024 / 1024) }, agents: { active: 0, total: 0 }, tasks: { pending: 0, completed: 0, failed: 0 }, requests: { total: 0, success: 0, errors: 0 }, }; saveMetrics(defaultMetrics); return { success: true, component, resetAt: new Date().toISOString(), message: `System ${component} has been reset`, }; }, }, { name: 'mcp_status', description: 'Get MCP server status, including stdio mode detection Use when native Claude Code MCP status is wrong because you need Ruflo-side server detail — tool counts per namespace, transport stats, MCP handshake errors. For just "is MCP up?", `claude mcp list` is fine.', category: 'system', inputSchema: { type: 'object', properties: {}, }, handler: async () => { // Detect if we are running inside an MCP stdio session. // When Claude Code launches us via `claude mcp add`, stdin is piped (not a TTY) // and the process IS the MCP server, so it is running. const isStdio = !process.stdin.isTTY; const transport = process.env.CLAUDE_FLOW_MCP_TRANSPORT || (isStdio ? 'stdio' : 'http'); const port = parseInt(process.env.CLAUDE_FLOW_MCP_PORT || '3000', 10); if (transport === 'stdio' || isStdio) { // In stdio mode the MCP server is this process itself return { running: true, pid: process.pid, transport: 'stdio', port: null, host: null, }; } // For HTTP/WebSocket, try to check if the server is listening const host = process.env.CLAUDE_FLOW_MCP_HOST || 'localhost'; try { const { createConnection } = await import('node:net'); const connected = await new Promise((resolve) => { const socket = createConnection({ host, port }, () => { socket.destroy(); resolve(true); }); socket.on('error', () => resolve(false)); socket.setTimeout(2000, () => { socket.destroy(); resolve(false); }); }); return { running: connected, transport, port, host, }; } catch { return { running: false, transport, port, host, }; } }, }, { name: 'task_summary', description: 'Get a summary of all tasks by status Use when native TodoWrite is wrong because you need cross-session task persistence, agent assignment, dependency tracking, or completion analytics in the .swarm/memory.db. For in-session checklists native TodoWrite is simpler and faster.', category: 'task', inputSchema: { type: 'object', properties: {}, }, handler: async () => { // Read from the task store file const storePath = join(getProjectCwd(), '.claude-flow', 'tasks', 'store.json'); let tasks = []; try { if (existsSync(storePath)) { const data = readFileSync(storePath, 'utf-8'); const store = JSON.parse(data); tasks = Object.values(store.tasks || {}); } } catch { // empty store } return { total: tasks.length, pending: tasks.filter(t => t.status === 'pending').length, running: tasks.filter(t => t.status === 'in_progress').length, completed: tasks.filter(t => t.status === 'completed').length, failed: tasks.filter(t => t.status === 'failed').length, }; }, }, { // #1916: `ruflo start` referenced an unregistered `mcp_start` tool. MCP // tools run *in-process* via the CLI's TOOL_REGISTRY — there is no // separate server process to spawn from inside an MCP call. If this tool // responds, MCP is already up. (`ruflo mcp start` runs a standalone // stdio/HTTP server; that's a process command, not an MCP tool.) name: 'mcp_start', description: 'Report that the in-process MCP toolset is available (no-op "start" — if this tool responds, MCP is up). Use when native `claude mcp list` is wrong because you want Ruflo-side confirmation that the in-process registry loaded. For a standalone stdio/HTTP MCP server, run `ruflo mcp start` (a process command, not this tool). Pair with mcp_status for detail.', category: 'system', inputSchema: { type: 'object', properties: { port: { type: 'number', description: 'Port (advisory — in-process MCP has no port)' }, transport: { type: 'string', description: 'Transport (advisory)' }, tools: { type: 'array', items: { type: 'string' }, description: 'Tool namespaces (advisory — all are loaded)' }, }, }, handler: async (input) => { const isStdio = !process.stdin.isTTY; return { serverId: `in-process-${process.pid}`, port: typeof input.port === 'number' ? input.port : (parseInt(process.env.CLAUDE_FLOW_MCP_PORT || '0', 10) || null), transport: input.transport || process.env.CLAUDE_FLOW_MCP_TRANSPORT || (isStdio ? 'stdio' : 'in-process'), startedAt: new Date().toISOString(), note: 'MCP tools run in-process via the CLI; no separate server process was started. Use `ruflo mcp start` for a standalone server.', }; }, }, { // #1916: `ruflo stop` referenced an unregistered `mcp_stop` tool. Same // story as mcp_start — nothing to stop for the in-process registry. name: 'mcp_stop', description: 'No-op "stop" for the in-process MCP toolset (there is no separate server process to stop from inside an MCP call). Use when native process-kill is wrong because you mistakenly think Ruflo runs a daemon — it does not, the tools live in the CLI process. To stop a standalone server run `ruflo mcp stop` or terminate that process. Pair with mcp_status.', category: 'system', inputSchema: { type: 'object', properties: { graceful: { type: 'boolean', description: 'Advisory (no-op)' }, timeout: { type: 'number', description: 'Advisory (no-op)' }, }, }, handler: async () => { return { stopped: false, note: 'no separate MCP server process; nothing to stop. The in-process toolset goes away when the CLI process exits. Use `ruflo mcp stop` for a standalone server.', }; }, }, ]; //# sourceMappingURL=system-tools.js.map