UNPKG

claude-flow

Version:

Ruflo - Enterprise AI agent orchestration for Claude Code. Deploy 60+ specialized agents in coordinated swarms with self-learning, fault-tolerant consensus, vector memory, and MCP integration

355 lines 19.6 kB
/** * RuVector LLM WASM MCP Tools * * Exposes @ruvector/ruvllm-wasm operations via MCP protocol. * All tools gracefully degrade when the WASM package is not installed. */ import { validateIdentifier, validateText } from './validate-input.js'; // #2086 — every ruvllm_* MCP handler that touches the WASM runtime calls // this. The downstream `createSonaInstant`/`createMicroLora`/`createHnswRouter` // helpers all need `initSync({ module: wasmBytes })` to have run, otherwise // the WASM exports throw. Doing it here makes the bootstrap invisible to // MCP callers they don't need a separate `ruvllm_init` tool. `_wasmReady` // inside `initRuvllmWasm` short-circuits on the second+ call, so the cost // after the first invocation is one boolean check. // // `ruvllm_status` deliberately uses `loadRuvllmWasmModule()` (no init) so a // caller diagnosing why nothing works gets `initialized=false` instead of // an error from a failed init. async function loadRuvllmWasm() { const mod = await loadRuvllmWasmModule(); await mod.initRuvllmWasm(); return mod; } async function loadRuvllmWasmModule() { return import('../ruvector/ruvllm-wasm.js'); } export const ruvllmWasmTools = [ { name: 'ruvllm_status', description: 'Get ruvllm-wasm availability and initialization status. Use when sending every prompt to the Anthropic API is wrong because you need local inference — air-gapped environments, MicroLoRA-fine-tuned per-task adapters, or sub-cent per-call cost. For general Claude work native Task is the right call.', inputSchema: { type: 'object', properties: {} }, handler: async () => { try { const mod = await loadRuvllmWasmModule(); const wasmStatus = await mod.getRuvllmStatus(); // Also include native ruvllm CJS backend status (ADR-086) let nativeBackend = { available: false }; try { const { getIntelligenceStats } = await import('../memory/intelligence.js'); const iStats = getIntelligenceStats(); const { getSONAStats } = await import('../memory/sona-optimizer.js'); const sStats = await getSONAStats(); nativeBackend = { available: iStats._ruvllmBackend === 'active', coordinator: iStats._ruvllmBackend || 'unavailable', trajectories: iStats._ruvllmTrajectories || 0, contrastiveTrainer: sStats._contrastiveTrainer !== 'unavailable' ? 'active' : 'unavailable', trainingBackend: iStats._trainingBackend || 'unknown', }; } catch { /* not initialized yet */ } // Graph database status (ADR-087) let graphStatus = { available: false }; try { const { getGraphStats } = await import('../ruvector/graph-backend.js'); const gs = await getGraphStats(); graphStatus = { available: gs.backend === 'graph-node', ...gs }; } catch { /* not loaded */ } return { content: [{ type: 'text', text: JSON.stringify({ wasm: wasmStatus, native: nativeBackend, graph: graphStatus }, null, 2) }] }; } catch (err) { return { content: [{ type: 'text', text: JSON.stringify({ error: String(err) }) }], isError: true }; } }, }, { name: 'ruvllm_hnsw_create', description: 'Create a WASM HNSW router for semantic pattern routing. Max ~11 patterns (v2.0.1 limit). Use when sending every prompt to the Anthropic API is wrong because you need local inference — air-gapped environments, MicroLoRA-fine-tuned per-task adapters, or sub-cent per-call cost. For general Claude work native Task is the right call.', inputSchema: { type: 'object', properties: { dimensions: { type: 'number', description: 'Embedding dimensions (e.g., 64, 128, 384)' }, maxPatterns: { type: 'number', description: 'Max patterns capacity (limit ~11 in v2.0.1)' }, efSearch: { type: 'number', description: 'HNSW ef search parameter (higher = more accurate, slower)' }, }, required: ['dimensions', 'maxPatterns'], }, handler: async (args) => { try { const mod = await loadRuvllmWasm(); const router = await mod.createHnswRouter({ dimensions: args.dimensions, maxPatterns: args.maxPatterns, efSearch: args.efSearch, }); // Store router in module-level registry const id = `hnsw-${Date.now().toString(36)}`; hnswRouters.set(id, router); return { content: [{ type: 'text', text: JSON.stringify({ success: true, routerId: id, dimensions: args.dimensions, maxPatterns: args.maxPatterns }) }] }; } catch (err) { return { content: [{ type: 'text', text: JSON.stringify({ error: String(err) }) }], isError: true }; } }, }, { name: 'ruvllm_hnsw_add', description: 'Add a pattern to an HNSW router. Embedding must match router dimensions. Use when sending every prompt to the Anthropic API is wrong because you need local inference — air-gapped environments, MicroLoRA-fine-tuned per-task adapters, or sub-cent per-call cost. For general Claude work native Task is the right call.', inputSchema: { type: 'object', properties: { routerId: { type: 'string', description: 'HNSW router ID from ruvllm_hnsw_create' }, name: { type: 'string', description: 'Pattern name/label' }, embedding: { type: 'array', items: { type: 'number' }, description: 'Float array embedding vector' }, metadata: { type: 'object', description: 'Optional metadata object' }, }, required: ['routerId', 'name', 'embedding'], }, handler: async (args) => { { const v = validateIdentifier(args.routerId, 'routerId'); if (!v.valid) return { content: [{ type: 'text', text: JSON.stringify({ error: v.error }) }], isError: true }; } { const v = validateIdentifier(args.name, 'name'); if (!v.valid) return { content: [{ type: 'text', text: JSON.stringify({ error: v.error }) }], isError: true }; } try { const router = hnswRouters.get(args.routerId); if (!router) return { content: [{ type: 'text', text: JSON.stringify({ error: `Router not found: ${args.routerId}` }) }], isError: true }; const embedding = new Float32Array(args.embedding); const ok = router.addPattern({ name: args.name, embedding, metadata: args.metadata, }); return { content: [{ type: 'text', text: JSON.stringify({ success: ok, patternCount: router.patternCount() }) }] }; } catch (err) { return { content: [{ type: 'text', text: JSON.stringify({ error: String(err) }) }], isError: true }; } }, }, { name: 'ruvllm_hnsw_route', description: 'Route a query embedding to nearest patterns in HNSW index. Use when sending every prompt to the Anthropic API is wrong because you need local inference — air-gapped environments, MicroLoRA-fine-tuned per-task adapters, or sub-cent per-call cost. For general Claude work native Task is the right call.', inputSchema: { type: 'object', properties: { routerId: { type: 'string', description: 'HNSW router ID' }, query: { type: 'array', items: { type: 'number' }, description: 'Query embedding vector' }, k: { type: 'number', description: 'Number of nearest neighbors (default: 3)' }, }, required: ['routerId', 'query'], }, handler: async (args) => { { const v = validateIdentifier(args.routerId, 'routerId'); if (!v.valid) return { content: [{ type: 'text', text: JSON.stringify({ error: v.error }) }], isError: true }; } try { const router = hnswRouters.get(args.routerId); if (!router) return { content: [{ type: 'text', text: JSON.stringify({ error: `Router not found: ${args.routerId}` }) }], isError: true }; const query = new Float32Array(args.query); const results = router.route(query, args.k ?? 3); return { content: [{ type: 'text', text: JSON.stringify({ results }) }] }; } catch (err) { return { content: [{ type: 'text', text: JSON.stringify({ error: String(err) }) }], isError: true }; } }, }, { name: 'ruvllm_sona_create', description: 'Create a SONA instant adaptation loop (<1ms adaptation cycles). Use when sending every prompt to the Anthropic API is wrong because you need local inference — air-gapped environments, MicroLoRA-fine-tuned per-task adapters, or sub-cent per-call cost. For general Claude work native Task is the right call.', inputSchema: { type: 'object', properties: { hiddenDim: { type: 'number', description: 'Hidden dimension (default: 64)' }, learningRate: { type: 'number', description: 'Learning rate (default: 0.01)' }, patternCapacity: { type: 'number', description: 'Max stored patterns' }, }, }, handler: async (args) => { try { const mod = await loadRuvllmWasm(); const sona = await mod.createSonaInstant({ hiddenDim: args.hiddenDim, learningRate: args.learningRate, patternCapacity: args.patternCapacity, }); const id = `sona-${Date.now().toString(36)}`; sonaInstances.set(id, sona); return { content: [{ type: 'text', text: JSON.stringify({ success: true, sonaId: id }) }] }; } catch (err) { return { content: [{ type: 'text', text: JSON.stringify({ error: String(err) }) }], isError: true }; } }, }, { name: 'ruvllm_sona_adapt', description: 'Run SONA instant adaptation with a quality signal. Use when sending every prompt to the Anthropic API is wrong because you need local inference — air-gapped environments, MicroLoRA-fine-tuned per-task adapters, or sub-cent per-call cost. For general Claude work native Task is the right call.', inputSchema: { type: 'object', properties: { sonaId: { type: 'string', description: 'SONA instance ID' }, quality: { type: 'number', description: 'Quality signal (0.0-1.0)' }, }, required: ['sonaId', 'quality'], }, handler: async (args) => { { const v = validateIdentifier(args.sonaId, 'sonaId'); if (!v.valid) return { content: [{ type: 'text', text: JSON.stringify({ error: v.error }) }], isError: true }; } try { const sona = sonaInstances.get(args.sonaId); if (!sona) return { content: [{ type: 'text', text: JSON.stringify({ error: `SONA not found: ${args.sonaId}` }) }], isError: true }; sona.adapt(args.quality); return { content: [{ type: 'text', text: JSON.stringify({ success: true, stats: sona.stats() }) }] }; } catch (err) { return { content: [{ type: 'text', text: JSON.stringify({ error: String(err) }) }], isError: true }; } }, }, { name: 'ruvllm_microlora_create', description: 'Create a MicroLoRA adapter (ultra-lightweight LoRA, ranks 1-4). Use when sending every prompt to the Anthropic API is wrong because you need local inference — air-gapped environments, MicroLoRA-fine-tuned per-task adapters, or sub-cent per-call cost. For general Claude work native Task is the right call.', inputSchema: { type: 'object', properties: { inputDim: { type: 'number', description: 'Input dimension' }, outputDim: { type: 'number', description: 'Output dimension' }, rank: { type: 'number', description: 'LoRA rank (1-4, default: 2)' }, alpha: { type: 'number', description: 'LoRA alpha scaling (default: 1.0)' }, }, required: ['inputDim', 'outputDim'], }, handler: async (args) => { try { const mod = await loadRuvllmWasm(); const lora = await mod.createMicroLora({ inputDim: args.inputDim, outputDim: args.outputDim, rank: args.rank, alpha: args.alpha, }); const id = `lora-${Date.now().toString(36)}`; loraInstances.set(id, lora); return { content: [{ type: 'text', text: JSON.stringify({ success: true, loraId: id }) }] }; } catch (err) { return { content: [{ type: 'text', text: JSON.stringify({ error: String(err) }) }], isError: true }; } }, }, { name: 'ruvllm_microlora_adapt', description: 'Adapt MicroLoRA weights with quality feedback. Use when sending every prompt to the Anthropic API is wrong because you need local inference — air-gapped environments, MicroLoRA-fine-tuned per-task adapters, or sub-cent per-call cost. For general Claude work native Task is the right call.', inputSchema: { type: 'object', properties: { loraId: { type: 'string', description: 'MicroLoRA instance ID' }, quality: { type: 'number', description: 'Quality signal (0.0-1.0)' }, learningRate: { type: 'number', description: 'Learning rate (default: 0.01)' }, success: { type: 'boolean', description: 'Whether the adaptation was successful (default: true)' }, }, required: ['loraId', 'quality'], }, handler: async (args) => { { const v = validateIdentifier(args.loraId, 'loraId'); if (!v.valid) return { content: [{ type: 'text', text: JSON.stringify({ error: v.error }) }], isError: true }; } try { const lora = loraInstances.get(args.loraId); if (!lora) return { content: [{ type: 'text', text: JSON.stringify({ error: `MicroLoRA not found: ${args.loraId}` }) }], isError: true }; lora.adapt(args.quality, args.learningRate, args.success); return { content: [{ type: 'text', text: JSON.stringify({ success: true, stats: lora.stats() }) }] }; } catch (err) { return { content: [{ type: 'text', text: JSON.stringify({ error: String(err) }) }], isError: true }; } }, }, { name: 'ruvllm_chat_format', description: 'Format chat messages using a template (llama3, mistral, chatml, phi, gemma, or auto-detect). Use when sending every prompt to the Anthropic API is wrong because you need local inference — air-gapped environments, MicroLoRA-fine-tuned per-task adapters, or sub-cent per-call cost. For general Claude work native Task is the right call.', inputSchema: { type: 'object', properties: { messages: { type: 'array', items: { type: 'object', properties: { role: { type: 'string' }, content: { type: 'string' } }, required: ['role', 'content'] }, description: 'Array of {role, content} message objects', }, template: { type: 'string', description: 'Template preset (llama3, mistral, chatml, phi, gemma) or model ID for auto-detection' }, }, required: ['messages', 'template'], }, handler: async (args) => { { const v = validateText(args.template, 'template', 256); if (!v.valid) return { content: [{ type: 'text', text: JSON.stringify({ error: v.error }) }], isError: true }; } try { const mod = await loadRuvllmWasm(); const messages = args.messages; const templateStr = args.template; const presets = ['llama3', 'mistral', 'chatml', 'phi', 'gemma']; const template = presets.includes(templateStr) ? templateStr : { modelId: templateStr }; const formatted = await mod.formatChat(messages, template); return { content: [{ type: 'text', text: formatted }] }; } catch (err) { return { content: [{ type: 'text', text: JSON.stringify({ error: String(err) }) }], isError: true }; } }, }, { name: 'ruvllm_generate_config', description: 'Create a generation config (maxTokens, temperature, topP, etc.) as JSON. Use when sending every prompt to the Anthropic API is wrong because you need local inference — air-gapped environments, MicroLoRA-fine-tuned per-task adapters, or sub-cent per-call cost. For general Claude work native Task is the right call.', inputSchema: { type: 'object', properties: { maxTokens: { type: 'number', description: 'Max tokens to generate' }, temperature: { type: 'number', description: 'Sampling temperature (note: f32 precision)' }, topP: { type: 'number', description: 'Top-p sampling' }, topK: { type: 'number', description: 'Top-k sampling' }, repetitionPenalty: { type: 'number', description: 'Repetition penalty' }, stopSequences: { type: 'array', items: { type: 'string' }, description: 'Stop sequences' }, }, }, handler: async (args) => { try { const mod = await loadRuvllmWasm(); const config = await mod.createGenerateConfig(args); return { content: [{ type: 'text', text: config }] }; } catch (err) { return { content: [{ type: 'text', text: JSON.stringify({ error: String(err) }) }], isError: true }; } }, }, ]; // ── Instance Registries ────────────────────────────────────── const hnswRouters = new Map(); const sonaInstances = new Map(); const loraInstances = new Map(); //# sourceMappingURL=ruvllm-tools.js.map