UNPKG

claude-flow

Version:

Ruflo - Enterprise AI agent orchestration for Claude Code. Deploy 60+ specialized agents in coordinated swarms with self-learning, fault-tolerant consensus, vector memory, and MCP integration

390 lines 16.2 kB
/** * Swarm MCP Tools for CLI * * Tool definitions for swarm coordination with file-based state persistence. * Replaces previous stub implementations with real state tracking. */ import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs'; import { join } from 'node:path'; import { getProjectCwd } from './types.js'; import { validateIdentifier } from './validate-input.js'; // Swarm state persistence const SWARM_DIR = '.claude-flow/swarm'; const SWARM_STATE_FILE = 'swarm-state.json'; function getSwarmDir() { return join(getProjectCwd(), SWARM_DIR); } function getSwarmStatePath() { return join(getSwarmDir(), SWARM_STATE_FILE); } function ensureSwarmDir() { const dir = getSwarmDir(); if (!existsSync(dir)) { mkdirSync(dir, { recursive: true, mode: 0o700 }); } } /** * #1799 — return true when `pid` belongs to a live process. process.kill(pid, 0) * with signal 0 is the documented liveness probe: ESRCH ⇒ dead, EPERM ⇒ alive * but owned by another user (still alive — don't reap), success ⇒ alive. */ function isPidAlive(pid) { try { process.kill(pid, 0); return true; } catch (e) { return e.code === 'EPERM'; } } /** * #1799 — Walk swarms with status='running' and mark orphans as 'terminated': * * - PID-based: if `pid` is set and the process is dead, the swarm is an * orphan (host crashed / shell exited / daemon backgrounded poorly). * - TTL fallback: pre-#1799 entries have no `pid`; reap them when their * `updatedAt` is older than 24h. This is conservative — long-idle but * legitimately running swarms can recover by writing a heartbeat. * * Mutates `store` in place; returns the count for the caller to decide * whether to persist. */ const ORPHAN_TTL_MS = 24 * 60 * 60 * 1000; function reconcileOrphanSwarms(store) { let reconciled = 0; const nowIso = new Date().toISOString(); const nowMs = Date.now(); for (const swarm of Object.values(store.swarms)) { if (swarm.status !== 'running') continue; let orphanReason = null; if (typeof swarm.pid === 'number') { if (!isPidAlive(swarm.pid)) { orphanReason = `host process ${swarm.pid} exited`; } } else { const ageMs = nowMs - new Date(swarm.updatedAt).getTime(); if (Number.isFinite(ageMs) && ageMs > ORPHAN_TTL_MS) { orphanReason = `no pid recorded and heartbeat is ${Math.round(ageMs / 3600000)}h stale`; } } if (orphanReason) { swarm.status = 'terminated'; swarm.terminationReason = orphanReason; swarm.updatedAt = nowIso; reconciled++; } } return reconciled; } // #2085 — exported so `agent-tools.ts agent_spawn` can push into // `swarm.agents` (the field `swarm_status` reads). export function loadSwarmStore() { let store = { swarms: {}, version: '3.0.0' }; try { const path = getSwarmStatePath(); if (existsSync(path)) { store = JSON.parse(readFileSync(path, 'utf-8')); } } catch { /* fall through with default */ } // #1799 — reconcile orphans on every load and persist if anything changed. // Cheap (process.kill(pid, 0) is sub-millisecond) and means // `swarm_status`/`swarm_health` never see ghost "running" entries. const reconciled = reconcileOrphanSwarms(store); if (reconciled > 0) { try { saveSwarmStore(store); } catch { /* best-effort */ } } return store; } export function saveSwarmStore(store) { ensureSwarmDir(); writeFileSync(getSwarmStatePath(), JSON.stringify(store, null, 2), 'utf-8'); } // Input validation const VALID_TOPOLOGIES = new Set([ 'hierarchical', 'mesh', 'hierarchical-mesh', 'ring', 'star', 'hybrid', 'adaptive', ]); export const swarmTools = [ { name: 'swarm_init', description: 'Initialize a swarm with persistent state tracking Use when native Task tool is wrong because you need multi-agent coordination — topology (hierarchical/mesh/star), consensus (raft/byzantine/gossip/crdt/quorum), shared memory namespace, or anti-drift gates. For independent one-shot subagents, native Task is fine; spawn each separately.', category: 'swarm', inputSchema: { type: 'object', properties: { topology: { type: 'string', description: 'Swarm topology type (hierarchical, mesh, hierarchical-mesh, ring, star, hybrid, adaptive)' }, maxAgents: { type: 'number', description: 'Maximum number of agents (1-50)' }, strategy: { type: 'string', description: 'Agent strategy (specialized, balanced, adaptive)' }, config: { type: 'object', description: 'Additional swarm configuration' }, }, }, handler: async (input) => { // Validate user-provided input (#1425) if (input.topology) { const v = validateIdentifier(input.topology, 'topology'); if (!v.valid) return { success: false, error: v.error }; } if (input.strategy) { const v = validateIdentifier(input.strategy, 'strategy'); if (!v.valid) return { success: false, error: v.error }; } const topology = input.topology || 'hierarchical-mesh'; const maxAgents = Math.min(Math.max(input.maxAgents || 15, 1), 50); const strategy = input.strategy || 'specialized'; const config = (input.config || {}); if (!VALID_TOPOLOGIES.has(topology)) { return { success: false, error: `Invalid topology: ${topology}. Valid: ${[...VALID_TOPOLOGIES].join(', ')}`, }; } const swarmId = `swarm-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`; const now = new Date().toISOString(); const swarmState = { swarmId, topology, maxAgents, status: 'running', agents: [], tasks: [], config: { topology, maxAgents, strategy, communicationProtocol: config.communicationProtocol || 'message-bus', autoScaling: config.autoScaling ?? true, consensusMechanism: config.consensusMechanism || 'majority', }, createdAt: now, updatedAt: now, // #1799 — record host PID so subsequent loads can detect orphans // when this process exits without a graceful swarm_shutdown. pid: process.pid, }; const store = loadSwarmStore(); store.swarms[swarmId] = swarmState; saveSwarmStore(store); return { success: true, swarmId, topology, strategy, maxAgents, initializedAt: now, config: swarmState.config, persisted: true, }; }, }, { name: 'swarm_status', description: 'Get swarm status from persistent state Use when native Task tool is wrong because you need multi-agent coordination — topology (hierarchical/mesh/star), consensus (raft/byzantine/gossip/crdt/quorum), shared memory namespace, or anti-drift gates. For independent one-shot subagents, native Task is fine; spawn each separately.', category: 'swarm', inputSchema: { type: 'object', properties: { swarmId: { type: 'string', description: 'Swarm ID (omit for most recent)' }, }, }, handler: async (input) => { // Validate user-provided input (#1425) if (input.swarmId) { const v = validateIdentifier(input.swarmId, 'swarmId'); if (!v.valid) return { success: false, error: v.error }; } const store = loadSwarmStore(); const swarmId = input.swarmId; if (swarmId && store.swarms[swarmId]) { const swarm = store.swarms[swarmId]; return { swarmId: swarm.swarmId, status: swarm.status, topology: swarm.topology, maxAgents: swarm.maxAgents, agentCount: swarm.agents.length, taskCount: swarm.tasks.length, config: swarm.config, createdAt: swarm.createdAt, updatedAt: swarm.updatedAt, }; } // Return most recent swarm if no ID specified const swarmIds = Object.keys(store.swarms); if (swarmIds.length === 0) { return { status: 'no_swarm', message: 'No active swarms. Use swarm_init to create one.', totalSwarms: 0, }; } const latest = swarmIds .map(id => store.swarms[id]) .sort((a, b) => new Date(b.updatedAt).getTime() - new Date(a.updatedAt).getTime())[0]; return { swarmId: latest.swarmId, status: latest.status, topology: latest.topology, maxAgents: latest.maxAgents, agentCount: latest.agents.length, taskCount: latest.tasks.length, config: latest.config, createdAt: latest.createdAt, updatedAt: latest.updatedAt, totalSwarms: swarmIds.length, }; }, }, { name: 'swarm_shutdown', description: 'Shutdown a swarm and update persistent state Use when native Task tool is wrong because you need multi-agent coordination — topology (hierarchical/mesh/star), consensus (raft/byzantine/gossip/crdt/quorum), shared memory namespace, or anti-drift gates. For independent one-shot subagents, native Task is fine; spawn each separately.', category: 'swarm', inputSchema: { type: 'object', properties: { swarmId: { type: 'string', description: 'Swarm ID to shutdown' }, graceful: { type: 'boolean', description: 'Graceful shutdown (default: true)' }, }, }, handler: async (input) => { // Validate user-provided input (#1425) if (input.swarmId) { const v = validateIdentifier(input.swarmId, 'swarmId'); if (!v.valid) return { success: false, error: v.error }; } const store = loadSwarmStore(); const swarmId = input.swarmId; // Find the swarm let target; if (swarmId && store.swarms[swarmId]) { target = store.swarms[swarmId]; } else { // Shutdown most recent running swarm const running = Object.values(store.swarms) .filter(s => s.status === 'running') .sort((a, b) => new Date(b.updatedAt).getTime() - new Date(a.updatedAt).getTime()); target = running[0]; } if (!target) { return { success: false, error: swarmId ? `Swarm ${swarmId} not found` : 'No running swarms to shutdown', }; } if (target.status === 'terminated') { return { success: false, swarmId: target.swarmId, error: 'Swarm already terminated', }; } target.status = 'terminated'; target.updatedAt = new Date().toISOString(); saveSwarmStore(store); return { success: true, swarmId: target.swarmId, terminated: true, graceful: input.graceful ?? true, agentsTerminated: target.agents.length, terminatedAt: target.updatedAt, }; }, }, { name: 'swarm_health', description: 'Check swarm health status with real state inspection Use when native Task tool is wrong because you need multi-agent coordination — topology (hierarchical/mesh/star), consensus (raft/byzantine/gossip/crdt/quorum), shared memory namespace, or anti-drift gates. For independent one-shot subagents, native Task is fine; spawn each separately.', category: 'swarm', inputSchema: { type: 'object', properties: { swarmId: { type: 'string', description: 'Swarm ID to check' }, }, }, handler: async (input) => { // Validate user-provided input (#1425) if (input.swarmId) { const v = validateIdentifier(input.swarmId, 'swarmId'); if (!v.valid) return { success: false, error: v.error }; } const store = loadSwarmStore(); const swarmId = input.swarmId; // Find the swarm let target; if (swarmId) { target = store.swarms[swarmId]; if (!target) { return { status: 'not_found', healthy: false, checks: [ { name: 'swarm_exists', status: 'fail', message: `Swarm ${swarmId} not found` }, ], checkedAt: new Date().toISOString(), }; } } else { const running = Object.values(store.swarms) .filter(s => s.status === 'running') .sort((a, b) => new Date(b.updatedAt).getTime() - new Date(a.updatedAt).getTime()); target = running[0]; } if (!target) { return { status: 'no_swarm', healthy: false, checks: [ { name: 'swarm_exists', status: 'fail', message: 'No active swarm found' }, ], checkedAt: new Date().toISOString(), }; } const isRunning = target.status === 'running'; const stateFileExists = existsSync(getSwarmStatePath()); const checks = [ { name: 'coordinator', status: isRunning ? 'ok' : 'warn', message: isRunning ? 'Coordinator active' : `Swarm status: ${target.status}`, }, { name: 'agents', status: target.agents.length > 0 ? 'ok' : 'info', message: `${target.agents.length} agents registered (max: ${target.maxAgents})`, }, { name: 'persistence', status: stateFileExists ? 'ok' : 'warn', message: stateFileExists ? 'State file persisted' : 'State file missing', }, { name: 'topology', status: 'ok', message: `Topology: ${target.topology}`, }, ]; const healthy = isRunning && stateFileExists; return { status: healthy ? 'healthy' : 'degraded', healthy, swarmId: target.swarmId, topology: target.topology, agentCount: target.agents.length, checks, checkedAt: new Date().toISOString(), }; }, }, ]; //# sourceMappingURL=swarm-tools.js.map