UNPKG

claude-flow

Version:

Ruflo - Enterprise AI agent orchestration for Claude Code. Deploy 60+ specialized agents in coordinated swarms with self-learning, fault-tolerant consensus, vector memory, and MCP integration

292 lines 12.5 kB
/** * ruvLLM Bridge -- Local Language Model Inference from RuVector * * Extends @ruvector/core with on-device GGUF model inference. * Provides 3-tier routing: * Tier 1: Agent Booster (WASM, <1ms) -- simple transforms * Tier 2: Local model via ruvLLM (~200ms) -- routing, classification * Tier 3: Cloud API (2-5s) -- complex reasoning * * All @ruvector/* packages are optional peer dependencies. * The bridge degrades gracefully when they are absent. * * @module @claude-flow/cli/appliance/ruvllm-bridge */ import { readdir, stat } from 'node:fs/promises'; import { join, extname, basename } from 'node:path'; const DEFAULT_CONFIG = { modelsDir: './models', defaultModel: '', maxTokens: 512, temperature: 0.7, contextSize: 4096, kvCachePath: '', verbose: false, }; // ── Quantization / parameter heuristics ───────────────────── const QUANT_PATTERNS = [ [/q4_k_m/i, 'q4_k_m'], [/q4_k_s/i, 'q4_k_s'], [/q4_0/i, 'q4_0'], [/q5_k_m/i, 'q5_k_m'], [/q5_0/i, 'q5_0'], [/q8_0/i, 'q8_0'], [/f16/i, 'f16'], [/f32/i, 'f32'], ]; function inferQuantization(filename) { for (const [re, label] of QUANT_PATTERNS) if (re.test(filename)) return label; return 'unknown'; } function inferParameters(filename) { const m = filename.match(/(\d+)[._-]?b/i); return m ? m[0].toUpperCase().replace(/[._-]/g, '') : 'unknown'; } // ── Complexity heuristic ──────────────────────────────────── const HIGH = new Set([ 'architect', 'design', 'refactor', 'security', 'audit', 'complex', 'analyze', 'distributed', 'concurrent', 'algorithm', 'investigate', 'optimize', 'debug', 'system', 'integration', ]); const LOW = new Set([ 'rename', 'typo', 'format', 'comment', 'version', 'bump', 'move', 'copy', 'delete', 'simple', 'config', ]); function estimateComplexity(desc) { const words = desc.toLowerCase().split(/\s+/); let score = 0.3; for (const w of words) { if (HIGH.has(w)) score += 0.15; if (LOW.has(w)) score -= 0.1; } return Math.max(0, Math.min(1, score + Math.min(0.2, words.length / 200))); } // ── Bridge ────────────────────────────────────────────────── export class RuvllmBridge { config; models = new Map(); activeModel = null; kvCacheEntries = 0; ruvectorCore = null; ruvectorRouter = null; ruvectorSona = null; ggufEngine = null; constructor(config) { if (!config.modelsDir) throw new Error('RuvllmConfig.modelsDir is required'); this.config = { ...DEFAULT_CONFIG, ...config }; } /** Probe optional @ruvector packages, initialize GGUF engine, and scan modelsDir. */ async initialize() { this.ruvectorCore = await this.tryImport('@ruvector/core'); this.ruvectorRouter = await this.tryImport('@ruvector/router'); this.ruvectorSona = await this.tryImport('@ruvector/sona'); // Initialize GGUF engine for local model inference try { const { GgufEngine } = await import('./gguf-engine.js'); this.ggufEngine = new GgufEngine({ contextSize: this.config.contextSize, maxTokens: this.config.maxTokens, temperature: this.config.temperature, kvCachePath: this.config.kvCachePath, verbose: this.config.verbose, }); await this.ggufEngine.initialize(); } catch { // GGUF engine is optional } await this.scanModelsDir(); if (this.config.verbose) { const pkgs = [ this.ruvectorCore && '@ruvector/core', this.ruvectorRouter && '@ruvector/router', this.ruvectorSona && '@ruvector/sona', this.ggufEngine && 'gguf-engine', ].filter(Boolean); if (pkgs.length) console.log(`[ruvLLM] Loaded: ${pkgs.join(', ')}`); console.log(`[ruvLLM] ${this.models.size} model(s) in ${this.config.modelsDir}`); } } /** Return all discovered GGUF models. */ async listModels() { return Array.from(this.models.values()); } /** Load a model into memory (delegates to GGUF engine or @ruvector/core). */ async loadModel(name) { const info = this.models.get(name); if (!info) throw new Error(`Model "${name}" not found. Available: ${[...this.models.keys()].join(', ')}`); // Prefer GGUF engine (parses header, loads via node-llama-cpp if available) if (this.ggufEngine) { const meta = await this.ggufEngine.loadModel(info.path); if (meta.architecture) info.parameters = meta.architecture; if (meta.quantization) info.quantization = meta.quantization; } else if (this.ruvectorCore?.loadModel) { await this.ruvectorCore.loadModel(info.path, { contextSize: this.config.contextSize }); } info.loaded = true; this.activeModel = name; } /** * Generate text from a prompt. Routes through tiers: * 1. Agent Booster (trivial transforms, no LLM). * 2. Local GGUF model via @ruvector/core. * 3. Cloud fallback (empty response -- caller handles upstream). */ async generate(request) { const start = performance.now(); const modelName = request.model ?? this.config.defaultModel ?? this.activeModel ?? ''; // Tier 1: Agent Booster const booster = this.tryAgentBooster(request.prompt); if (booster !== null) { return { text: booster, model: 'agent-booster', tokensUsed: 0, latencyMs: performance.now() - start, tier: 1, cached: false }; } // Tier 2: Local model (GGUF engine preferred, then @ruvector/core) const info = this.models.get(modelName); if (info?.loaded) { try { if (this.ggufEngine) { const r = await this.ggufEngine.generate({ prompt: request.prompt, maxTokens: request.maxTokens ?? this.config.maxTokens, temperature: request.temperature ?? this.config.temperature, stopSequences: request.stopSequences, }); return { text: r.text, model: modelName, tokensUsed: r.tokensUsed, latencyMs: performance.now() - start, tier: 2, cached: false }; } else if (this.ruvectorCore?.generate) { const r = await this.ruvectorCore.generate({ model: info.path, prompt: request.prompt, maxTokens: request.maxTokens ?? this.config.maxTokens, temperature: request.temperature ?? this.config.temperature, stopSequences: request.stopSequences, }); return { text: r.text ?? '', model: modelName, tokensUsed: r.tokensUsed ?? 0, latencyMs: performance.now() - start, tier: 2, cached: false }; } } catch (err) { if (this.config.verbose) console.warn('[ruvLLM] Local generation failed, tier 3 fallback:', err); } } // Tier 3: Cloud fallback return { text: '', model: 'cloud-fallback', tokensUsed: 0, latencyMs: performance.now() - start, tier: 3, cached: false }; } /** Route a task description to the optimal tier. Uses @ruvector/router when available. */ async routeTask(description) { if (this.ruvectorRouter?.route) { try { const r = await this.ruvectorRouter.route(description); return { tier: r.tier ?? 3, model: r.model ?? 'cloud', confidence: r.confidence ?? 0.5 }; } catch { /* fall through */ } } const complexity = estimateComplexity(description); const words = description.split(/\s+/).length; if (words < 15 && complexity < 0.25) return { tier: 1, model: 'agent-booster', confidence: 0.9 }; if (complexity < 0.55 && this.activeModel) return { tier: 2, model: this.activeModel, confidence: 0.7 }; return { tier: 3, model: 'cloud', confidence: 0.6 }; } /** Return current bridge status. */ async getStatus() { return { available: this.models.size > 0 || this.ruvectorCore !== null, ruvectorCore: this.ruvectorCore !== null, ruvectorRouter: this.ruvectorRouter !== null, ruvectorSona: this.ruvectorSona !== null, modelsLoaded: [...this.models.values()].filter((m) => m.loaded).map((m) => m.name), kvCacheSize: this.kvCacheEntries, }; } /** Persist KV-cache, unload models, and clean up. */ async shutdown() { if (this.ggufEngine) { await this.ggufEngine.shutdown(); this.ggufEngine = null; } if (this.config.kvCachePath && this.ruvectorCore?.persistKvCache) { try { await this.ruvectorCore.persistKvCache(this.config.kvCachePath); } catch (e) { if (this.config.verbose) console.warn('[ruvLLM] KV-cache persist failed:', e); } } if (this.ruvectorCore?.unloadAll) await this.ruvectorCore.unloadAll(); for (const info of this.models.values()) info.loaded = false; this.activeModel = null; this.kvCacheEntries = 0; } // ── Private ─────────────────────────────────────────────── async scanModelsDir() { try { const entries = await readdir(this.config.modelsDir); for (const entry of entries) { if (extname(entry).toLowerCase() !== '.gguf') continue; const fullPath = join(this.config.modelsDir, entry); const s = await stat(fullPath); if (!s.isFile()) continue; const name = basename(entry, '.gguf'); this.models.set(name, { name, path: fullPath, format: 'gguf', quantization: inferQuantization(entry), size: s.size, parameters: inferParameters(entry), loaded: false, }); } } catch { // modelsDir may not exist -- tier 1 and tier 3 still work } } async tryImport(pkg) { try { return await import(pkg); } catch { return null; } } /** Tier-1 Agent Booster: handle trivial transforms without any LLM. */ tryAgentBooster(prompt) { const t = prompt.trim(); if (t.length > 200) return null; if (/^(convert|change)\s+(var|let)\s+to\s+const$/i.test(t)) { return 'Use the Edit tool to replace `var`/`let` declarations with `const`.'; } if (/^remove\s+console\.(log|warn|error|debug|info)$/i.test(t)) { const m = t.toLowerCase().match(/console\.(\w+)/)?.[1] ?? 'log'; return `Use the Edit tool to remove all \`console.${m}\` calls.`; } return null; } } // ── Singleton accessor ────────────────────────────────────── let instance = null; /** Get or create the singleton RuvllmBridge. Config required on first call. */ export function getRuvllmBridge(config) { if (!instance && config) instance = new RuvllmBridge(config); if (!instance) throw new Error('ruvLLM bridge not initialized. Call with config first.'); return instance; } /** Reset the singleton (useful for tests). */ export function resetRuvllmBridge() { instance = null; } /** Check whether @ruvector/core is importable without loading the bridge. */ export async function isRuvllmAvailable() { try { await import('@ruvector/core'); return true; } catch { return false; } } //# sourceMappingURL=ruvllm-bridge.js.map