UNPKG

claude-flow

Version:

Ruflo - Enterprise AI agent orchestration for Claude Code. Deploy 60+ specialized agents in coordinated swarms with self-learning, fault-tolerant consensus, vector memory, and MCP integration

379 lines 14.3 kB
/** * RuVector LLM WASM Integration * * Wraps @ruvector/ruvllm-wasm for browser-native LLM inference utilities. * Provides HNSW routing, SONA instant adaptation, MicroLoRA fine-tuning, * chat template formatting, KV cache management, and inference arena. * * Published API (v2.0.2): RuvLLMWasm, ChatTemplateWasm, ChatMessageWasm, * GenerateConfig, HnswRouterWasm, SonaInstantWasm, MicroLoraWasm, * KvCacheWasm, BufferPoolWasm, InferenceArenaWasm. * * v2.0.2 fixes: * - HNSW supports 1000+ patterns (fixed connect_node ordering) * - MicroLoRA adapt() now takes (input: Float32Array, feedback: AdaptFeedbackWasm) * * Remaining quirks: * - Stats objects return WASM pointers — use .toJson() or named accessors * - GenerateConfig float precision loss (f32 roundtrip) * - MicroLoRA apply() hardcoded to 768 dims regardless of config * * @module @claude-flow/cli/ruvector/ruvllm-wasm */ import { readFileSync } from 'node:fs'; import { createRequire } from 'node:module'; // WASM binary requires at least 768-dim input for MicroLoRA adapt() const MICROLORA_WASM_MIN_DIM = 768; // ── WASM Module Detection & Init ───────────────────────────── let _wasmReady = false; /** * Check if @ruvector/ruvllm-wasm is installed and loadable. */ export async function isRuvllmWasmAvailable() { try { const mod = await import('@ruvector/ruvllm-wasm'); return typeof mod.RuvLLMWasm === 'function'; } catch { return false; } } /** * Initialize the WASM module for Node.js. Safe to call multiple times. * Uses initSync with object form: { module: bytes } (raw bytes deprecated). */ export async function initRuvllmWasm() { if (_wasmReady) return; try { const mod = await import('@ruvector/ruvllm-wasm'); const require_ = createRequire(import.meta.url); const wasmPath = require_.resolve('@ruvector/ruvllm-wasm/ruvllm_wasm_bg.wasm'); const wasmBytes = readFileSync(wasmPath); // MUST use object form — initSync(bytes) is deprecated mod.initSync({ module: wasmBytes }); _wasmReady = true; } catch (err) { throw new Error(`Failed to initialize @ruvector/ruvllm-wasm: ${err}`); } } /** * Get ruvllm-wasm status. */ export async function getRuvllmStatus() { const available = await isRuvllmWasmAvailable(); if (!available) { return { available: false, initialized: false, version: null }; } try { const mod = await import('@ruvector/ruvllm-wasm'); // version is a standalone function, not on RuvLLMWasm class const version = typeof mod.getVersion === 'function' ? mod.getVersion() : null; return { available: true, initialized: _wasmReady, version }; } catch { return { available: true, initialized: _wasmReady, version: null }; } } // ── HNSW Router ────────────────────────────────────────────── /** * Maximum safe pattern count for HNSW router. * v2.0.2 supports 1000+ patterns (fixed connect_node ordering). */ export const HNSW_MAX_SAFE_PATTERNS = 1024; /** * Create a WASM HNSW router for semantic routing. * Returns an object with add/route/clear methods. * * Enforces HNSW_MAX_SAFE_PATTERNS limit (1024 in v2.0.2). */ export async function createHnswRouter(config) { await initRuvllmWasm(); const mod = await import('@ruvector/ruvllm-wasm'); const router = new mod.HnswRouterWasm(config.dimensions, config.maxPatterns); if (config.efSearch) { router.setEfSearch(config.efSearch); } let count = 0; return { addPattern(pattern) { if (count >= HNSW_MAX_SAFE_PATTERNS) { throw new Error(`HNSW pattern limit reached (${HNSW_MAX_SAFE_PATTERNS}).`); } const metadataJson = JSON.stringify(pattern.metadata ?? {}); // addPattern requires 3 args: (embedding, name, metadata_json) const ok = router.addPattern(pattern.embedding, pattern.name, metadataJson); if (ok) count++; return ok; }, route(query, k = 3) { const raw = router.route(query, k); return Array.from(raw).map((r) => ({ name: r.name ?? r.pattern_name ?? '', score: r.score ?? r.distance ?? 0, metadata: r.metadata ? (typeof r.metadata === 'string' ? JSON.parse(r.metadata) : r.metadata) : undefined, })); }, clear() { router.clear(); count = 0; }, patternCount() { return count; }, toJson() { return router.toJson(); }, }; } // ── SONA Instant Adaptation ────────────────────────────────── /** * Create a SONA instant adaptation loop (<1ms adaptation). * v2.0.1: requires SonaConfigWasm object, not raw number. */ export async function createSonaInstant(config = {}) { await initRuvllmWasm(); const mod = await import('@ruvector/ruvllm-wasm'); const sonaConfig = new mod.SonaConfigWasm(); if (config.hiddenDim !== undefined) sonaConfig.hiddenDim = config.hiddenDim; if (config.learningRate !== undefined) sonaConfig.learningRate = config.learningRate; if (config.emaDecay !== undefined) sonaConfig.emaDecay = config.emaDecay; if (config.ewcLambda !== undefined) sonaConfig.ewcLambda = config.ewcLambda; if (config.microLoraRank !== undefined) sonaConfig.microLoraRank = config.microLoraRank; if (config.patternCapacity !== undefined) sonaConfig.patternCapacity = config.patternCapacity; const sona = new mod.SonaInstantWasm(sonaConfig); return { adapt(quality) { sona.instantAdapt(quality); }, recordPattern(embedding, success) { sona.recordPattern(embedding, success); }, suggestAction(context) { return sona.suggestAction(context); }, stats() { return sona.toJson(); }, reset() { sona.reset(); }, toJson() { return sona.toJson(); }, }; } // ── MicroLoRA ──────────────────────────────────────────────── /** * Create a MicroLoRA adapter (ultra-lightweight LoRA, ranks 1-4). * v2.0.2: .adapt(input, feedback) takes Float32Array + AdaptFeedbackWasm. */ export async function createMicroLora(config) { await initRuvllmWasm(); const mod = await import('@ruvector/ruvllm-wasm'); const loraConfig = new mod.MicroLoraConfigWasm(); loraConfig.inputDim = config.inputDim; loraConfig.outputDim = config.outputDim; loraConfig.rank = config.rank ?? 2; loraConfig.alpha = config.alpha ?? 1.0; const lora = new mod.MicroLoraWasm(loraConfig); return { apply(input) { return lora.apply(input); }, adapt(quality, learningRate = 0.01, success = true) { const feedback = new mod.AdaptFeedbackWasm(); feedback.quality = quality; feedback.learningRate = learningRate; try { feedback.success = success; } catch { /* v2.0.2 quirk */ } const input = new Float32Array(Math.max(config.inputDim, MICROLORA_WASM_MIN_DIM)); lora.adapt(input, feedback); // Flush accumulated gradients. HONEST CAVEAT (audit // docs/reviews/intelligence-system-audit-2026-05-29.md): even WITH this // flush, the shipped @ruvector/ruvllm-wasm@2.0.2 MicroLoraWasm.apply() // output is empirically UNCHANGED (measured maxAbsDelta = 0 after 200 // adapts). MicroLoRA adaptation is therefore effectively a no-op on // inference with this WASM backend. We do NOT synthesize a fake gradient // from the scalar quality to make output "move" — that would be a // fabricated signal. Real adaptation needs the WASM backend to flush B, // or a caller supplying real gradients. lora.applyUpdates(learningRate); }, applyUpdates(learningRate = 0.01) { // WASM runtime signature is applyUpdates(learning_rate: number); the // shipped .d.ts mistypes it as Float32Array, hence the cast. lora.applyUpdates(learningRate); }, stats() { return lora.toJson(); }, reset() { lora.reset(); }, toJson() { return lora.toJson(); }, pendingUpdates() { return lora.pendingUpdates(); }, }; } /** * Format chat messages using a chat template. * Supports presets (llama3, mistral, chatml, phi, gemma) and auto-detection. */ export async function formatChat(messages, template) { await initRuvllmWasm(); const mod = await import('@ruvector/ruvllm-wasm'); // Build template let tmpl; if (typeof template === 'string') { const presets = { llama3: () => mod.ChatTemplateWasm.llama3(), mistral: () => mod.ChatTemplateWasm.mistral(), chatml: () => mod.ChatTemplateWasm.chatml(), phi: () => mod.ChatTemplateWasm.phi(), gemma: () => mod.ChatTemplateWasm.gemma(), }; const factory = presets[template]; if (!factory) throw new Error(`Unknown template preset: ${template}. Use: ${Object.keys(presets).join(', ')}`); tmpl = factory(); } else if ('custom' in template) { tmpl = mod.ChatTemplateWasm.custom(template.custom); } else if ('modelId' in template) { tmpl = mod.ChatTemplateWasm.detectFromModelId(template.modelId); } // Build messages const wasmMessages = messages.map(m => { switch (m.role) { case 'system': return mod.ChatMessageWasm.system(m.content); case 'user': return mod.ChatMessageWasm.user(m.content); case 'assistant': return mod.ChatMessageWasm.assistant(m.content); default: throw new Error(`Unknown role: ${m.role}`); } }); return tmpl.format(wasmMessages); } // ── KV Cache ───────────────────────────────────────────────── /** * Create a KV cache for token management. */ export async function createKvCache(opts) { await initRuvllmWasm(); const mod = await import('@ruvector/ruvllm-wasm'); let cache; if (opts) { const config = new mod.KvCacheConfigWasm(); if (opts.tailLength !== undefined) config.tailLength = opts.tailLength; if (opts.maxTokens !== undefined) config.maxTokens = opts.maxTokens; if (opts.numKvHeads !== undefined) config.numKvHeads = opts.numKvHeads; if (opts.headDim !== undefined) config.headDim = opts.headDim; cache = new mod.KvCacheWasm(config); } else { cache = mod.KvCacheWasm.withDefaults(); } return { append(keys, values) { cache.append(keys, values); }, stats() { // Use toJson if available, otherwise stringify try { return JSON.stringify(cache.stats()); } catch { return '{}'; } }, clear() { cache.clear(); }, tokenCount() { return cache.tokenCount; }, }; } // ── Generate Config ────────────────────────────────────────── /** * Create a generation config object. * Note: f32 precision loss is expected (0.7 → 0.699999...). */ export async function createGenerateConfig(opts = {}) { await initRuvllmWasm(); const mod = await import('@ruvector/ruvllm-wasm'); const config = new mod.GenerateConfig(); if (opts.maxTokens !== undefined) config.maxTokens = opts.maxTokens; if (opts.temperature !== undefined) config.temperature = opts.temperature; if (opts.topP !== undefined) config.topP = opts.topP; if (opts.topK !== undefined) config.topK = opts.topK; if (opts.repetitionPenalty !== undefined) config.repetitionPenalty = opts.repetitionPenalty; for (const seq of opts.stopSequences ?? []) { config.addStopSequence(seq); } return config.toJson(); } // ── Buffer Pool ────────────────────────────────────────────── /** * Create a buffer pool for inference memory management. */ export async function createBufferPool(capacity) { await initRuvllmWasm(); const mod = await import('@ruvector/ruvllm-wasm'); const pool = mod.BufferPoolWasm.withCapacity(capacity); return { prewarm(count) { pool.prewarmAll(count); }, stats() { return pool.statsJson(); }, hitRate() { return pool.hitRate; }, clear() { pool.clear(); }, }; } // ── Inference Arena ────────────────────────────────────────── /** * Create an inference arena for model memory allocation. */ export async function createInferenceArena(opts) { await initRuvllmWasm(); const mod = await import('@ruvector/ruvllm-wasm'); const arena = 'capacity' in opts ? new mod.InferenceArenaWasm(opts.capacity) : mod.InferenceArenaWasm.forModel(opts.hiddenDim, opts.vocabSize, opts.batchSize); return { reset() { arena.reset(); }, used() { return arena.used; }, capacity() { return arena.capacity; }, remaining() { return arena.remaining; }, }; } //# sourceMappingURL=ruvllm-wasm.js.map