UNPKG

claude-flow

Version:

Ruflo - Enterprise AI agent orchestration for Claude Code. Deploy 60+ specialized agents in coordinated swarms with self-learning, fault-tolerant consensus, vector memory, and MCP integration

596 lines 25.4 kB
/** * Task Intent Classifier + Shard Retriever * * Stores rule shards in vector storage with embeddings and metadata. * At task start, retrieves the top N shards by semantic similarity * with hard filters by risk class and repo scope. * * Retrieval contract: * 1. Always include the constitution * 2. Retrieve up to 5 shards by semantic similarity * 3. Add hard filters by risk class and repo scope * 4. Contradiction check: prefer higher-priority rule ID * * @module @claude-flow/guidance/retriever */ // ============================================================================ // Intent Classification // ============================================================================ /** Intent detection patterns with confidence weights */ const INTENT_PATTERNS = { 'bug-fix': [ { pattern: /\b(fix|bug|broken|error|crash|issue|wrong|incorrect|fail)\b/i, weight: 0.8 }, { pattern: /\b(not working|doesn't work|unexpected|regression)\b/i, weight: 0.9 }, ], 'feature': [ { pattern: /\b(add|create|implement|build|new|introduce|develop)\b/i, weight: 0.5 }, { pattern: /\b(feature|capability|functionality|support for)\b/i, weight: 0.9 }, { pattern: /\b(user|page|profile|dashboard|form|widget|component|module)\b/i, weight: 0.3 }, ], 'refactor': [ { pattern: /\b(refactor|restructure|reorganize|simplify|clean|extract|inline)\b/i, weight: 0.9 }, { pattern: /\b(improve readability|reduce complexity|code quality)\b/i, weight: 0.8 }, ], 'security': [ { pattern: /\b(security|auth|permission|access control|encrypt|secret|token)\b/i, weight: 0.9 }, { pattern: /\b(cve|vulnerability|injection|xss|csrf|sanitize)\b/i, weight: 1.0 }, ], 'performance': [ { pattern: /\b(performance|optimize|speed|slow|fast|cache|memory usage|latency)\b/i, weight: 0.9 }, { pattern: /\b(bottleneck|profile|benchmark|throughput|efficient)\b/i, weight: 0.8 }, ], 'testing': [ { pattern: /\b(tests?|specs?|coverage|mocks?|asserts?|tdd|unit tests?|integration tests?)\b/i, weight: 1.0 }, { pattern: /\b(test suite|test case|test plan|quality assurance)\b/i, weight: 0.9 }, ], 'docs': [ { pattern: /\b(document|readme|jsdoc|comment|explain|describe|tutorial)\b/i, weight: 0.8 }, { pattern: /\b(api docs|documentation|usage guide|changelog)\b/i, weight: 0.9 }, ], 'deployment': [ { pattern: /\b(deploy|release|publish|ci|cd|pipeline|docker|kubernetes)\b/i, weight: 0.9 }, { pattern: /\b(staging|production|rollback|migration|version)\b/i, weight: 0.7 }, ], 'architecture': [ { pattern: /\b(architect|design pattern|system design|structure|boundary)\b/i, weight: 0.8 }, { pattern: /\b(module boundary|component architecture|layer|service mesh|domain model|aggregate root)\b/i, weight: 0.7 }, { pattern: /\b(interface|api design|separation of concerns)\b/i, weight: 0.6 }, ], 'debug': [ { pattern: /\b(debug|trace|log|diagnose|investigate|root cause)\b/i, weight: 0.9 }, { pattern: /\b(stack trace|breakpoint|inspect|reproduction)\b/i, weight: 0.8 }, ], 'general': [ { pattern: /./, weight: 0.1 }, ], }; /** * Deterministic hash-based embedding provider — **test-only**. * * Produces fixed-dimension vectors from a simple character-hash → sin() * transform. The resulting embeddings have no real semantic meaning; * they are stable and fast, which makes them useful for unit/integration * tests that need a concrete {@link IEmbeddingProvider} without loading * an ONNX model. * * **Do NOT use in production** — replace with a real model-backed * provider (e.g. the agentic-flow ONNX integration). */ export class HashEmbeddingProvider { dimensions; cache = new Map(); constructor(dimensions = 384) { this.dimensions = dimensions; } async embed(text) { const key = text.slice(0, 200); if (this.cache.has(key)) return this.cache.get(key); const embedding = this.hashEmbed(text); this.cache.set(key, embedding); return embedding; } async batchEmbed(texts) { return Promise.all(texts.map(t => this.embed(t))); } hashEmbed(text) { const embedding = new Float32Array(this.dimensions); const normalized = text.toLowerCase().trim(); for (let i = 0; i < this.dimensions; i++) { let hash = 0; for (let j = 0; j < normalized.length; j++) { hash = ((hash << 5) - hash + normalized.charCodeAt(j) * (i + 1)) | 0; } embedding[i] = (Math.sin(hash) + 1) / 2; } // L2 normalize let norm = 0; for (let i = 0; i < this.dimensions; i++) { norm += embedding[i] * embedding[i]; } norm = Math.sqrt(norm); if (norm > 0) { for (let i = 0; i < this.dimensions; i++) { embedding[i] /= norm; } } return embedding; } } // ============================================================================ // Shard Retriever // ============================================================================ export class ShardRetriever { shards = []; constitution = null; embeddingProvider; indexed = false; globCache = new Map(); // M3 perf substrate — packed embedding matrix for batched cosine. // The per-shard `embedding: Float32Array` fields are scattered allocations // that produce poor cache locality during scoreShards's O(n) scan. We // additionally cache a single contiguous Float32Array of shape // (shardCount × dim) and run the cosine as a tight matrix-vector dot. // V8 emits much tighter inner-loop code for this access pattern and // memory bandwidth becomes the floor. // // `packedDim === 0` when not yet packed (no shards, or shards lack // embeddings). Stale on shard mutation — `indexShards()` repacks. packedEmbeddings = null; packedDim = 0; packedShardCount = 0; // M4 perf substrate — RaBitQ-style 1-bit-per-dim signatures. // For unit vectors, the sign pattern of each dim is a Locality-Sensitive // Hash. P[sign(q[i]) === sign(s[i])] ≈ 1 - θ/π where θ is the angle // between q and s. So Hamming distance between signatures approximates // angular distance, and cosine ≈ 1 - 2·hamming/dim. For dim=384 this // costs 12 Uint32 (48 bytes) per shard — a 32x memory reduction vs // Float32Array — and the comparison is XOR + popcount per 32-bit word, // which V8 lowers to a tight machine-code loop. // // At dim=384: 6 multiplies per word × 12 words = 72 ops to compare two // signatures vs 384 multiplies for the full Float32 cosine. Even with // popcount in JS via the Hamming-Weight bit trick, this is ~6-8x // faster than the dot product. We use it as a coarse pre-filter: // compute Hamming distances, take the top-K candidates by Hamming, then // do exact cosine on just those. Top-K is much smaller than N so the // exact-cosine work is bounded. // // `bitsPerSig === dim` rounded up to a multiple of 32 (we waste at most // 31 bits per shard at non-aligned dims). packedSignatures = null; wordsPerSig = 0; // = ceil(dim/32) constructor(embeddingProvider) { this.embeddingProvider = embeddingProvider ?? new HashEmbeddingProvider(); } /** * Load a compiled policy bundle */ async loadBundle(bundle) { this.constitution = bundle.constitution; this.shards = bundle.shards; this.indexed = false; await this.indexShards(); } /** * Index all shards by generating embeddings. * * M3 substrate — also packs every shard embedding into a single * contiguous Float32Array (`packedEmbeddings`) so scoreShards can run * the cosine as a vectorized matrix-vector dot in cache-friendly * sequential memory rather than chasing per-shard heap pointers. * Costs O(n × dim) at index time (one-shot) for an O(n) scan win * on every query. */ async indexShards() { if (this.indexed) return; const texts = this.shards.map(s => s.compactText); const embeddings = await this.embeddingProvider.batchEmbed(texts); let dim = 0; for (let i = 0; i < this.shards.length; i++) { this.shards[i].embedding = embeddings[i]; if (embeddings[i] && embeddings[i].length > dim) dim = embeddings[i].length; } // Pack into a single contiguous Float32Array. Shards without an // embedding (or with a wrong dim) get a row of zeros — they fall // through to similarity=0 in the existing scoring path. if (dim > 0 && this.shards.length > 0) { const packed = new Float32Array(this.shards.length * dim); for (let i = 0; i < this.shards.length; i++) { const e = this.shards[i].embedding; if (e && e.length === dim) { packed.set(e, i * dim); } } this.packedEmbeddings = packed; this.packedDim = dim; this.packedShardCount = this.shards.length; // M4 — also compute the 1-bit sign signature per shard. Each row // is `ceil(dim/32)` Uint32 words; bit i is `embedding[i] > 0`. const words = (dim + 31) >>> 5; const sigs = new Uint32Array(this.shards.length * words); for (let i = 0; i < this.shards.length; i++) { const e = this.shards[i].embedding; if (!e || e.length !== dim) continue; const base = i * words; for (let w = 0; w < words; w++) { let bits = 0; const dimStart = w * 32; const dimEnd = Math.min(dim, dimStart + 32); for (let b = dimStart; b < dimEnd; b++) { if (e[b] > 0) bits |= 1 << (b - dimStart); } sigs[base + w] = bits >>> 0; } } this.packedSignatures = sigs; this.wordsPerSig = words; } else { this.packedEmbeddings = null; this.packedDim = 0; this.packedShardCount = 0; this.packedSignatures = null; this.wordsPerSig = 0; } this.indexed = true; } /** * Build a 1-bit sign signature for the query vector. Matches the * packed-shard format produced in indexShards above. */ buildQuerySignature(q) { const dim = q.length; const words = (dim + 31) >>> 5; const sig = new Uint32Array(words); for (let w = 0; w < words; w++) { let bits = 0; const start = w * 32; const end = Math.min(dim, start + 32); for (let b = start; b < end; b++) { if (q[b] > 0) bits |= 1 << (b - start); } sig[w] = bits >>> 0; } return sig; } /** * Hamming-Weight popcount on a single 32-bit word (Wegner / Wilkes). * Tested at ~1 ns on V8 — no native popcnt instruction exposed. */ static popcount32(x) { x = x - ((x >>> 1) & 0x55555555); x = (x & 0x33333333) + ((x >>> 2) & 0x33333333); x = (x + (x >>> 4)) & 0x0f0f0f0f; return (x * 0x01010101) >>> 24; } /** * Classify task intent */ classifyIntent(taskDescription) { let bestIntent = 'general'; let bestScore = 0; for (const [intent, patterns] of Object.entries(INTENT_PATTERNS)) { if (intent === 'general') continue; // Skip general fallback during scoring let score = 0; for (const { pattern, weight } of patterns) { if (pattern.test(taskDescription)) { score += weight; } } if (score > bestScore) { bestScore = score; bestIntent = intent; } } // Normalize confidence to 0-1 const confidence = Math.min(bestScore / 3, 1); return { intent: bestIntent, confidence }; } /** * Retrieve relevant shards for a task * * Contract: * 1. Always include the constitution * 2. Up to maxShards by semantic similarity * 3. Hard filters by risk class and repo scope * 4. Contradiction check: prefer higher priority */ async retrieve(request) { const startTime = performance.now(); if (!this.constitution) { throw new Error('No policy bundle loaded. Call loadBundle() first.'); } // Step 1: Classify intent const { intent: detectedIntent } = this.classifyIntent(request.taskDescription); const intent = request.intent ?? detectedIntent; // Step 2: Generate query embedding const queryEmbedding = await this.embeddingProvider.embed(request.taskDescription); // Step 3: Score all shards const maxShards = request.maxShards ?? 5; const scored = this.scoreShards(queryEmbedding, intent, request.riskFilter, request.repoScope); // Step 4: Select top N with contradiction resolution const selected = this.selectWithContradictionCheck(scored, maxShards); // Step 5: Build combined policy text const policyText = this.buildPolicyText(this.constitution, selected); const latencyMs = performance.now() - startTime; return { constitution: this.constitution, shards: selected, detectedIntent: intent, contradictionsResolved: this.countContradictions(selected), policyText, latencyMs, }; } /** * Score all shards against the query. * * M3 perf substrate — three changes from the baseline: * * 1. Filter FIRST, cosine SECOND. The old code computed cosine for * every shard regardless of whether riskFilter/repoScope would * throw it away. We now decide eligibility first and only do * the 384-dim multiply for survivors. * * 2. Packed-matrix cosine — when `packedEmbeddings` is current and * dim matches, compute the dot directly from contiguous memory * (one allocation, sequential reads) instead of dereferencing * `shard.embedding` per call. Embeddings are always unit- * normalised so cosine === dot + clamp. * * 3. Top-K partial selection — when the caller only wants `maxShards` * results (typical), don't `.sort()` the entire candidate list. * Maintain a fixed-size heap of size K and only compare/swap * against its current minimum. Drops the final step from * O(n log n) to O(n log K). */ scoreShards(queryEmbedding, intent, riskFilter, repoScope) { const results = []; const usePacked = this.packedEmbeddings !== null && this.packedShardCount === this.shards.length && this.packedDim === queryEmbedding.length; const packed = this.packedEmbeddings; const dim = this.packedDim; // M4 quantization fast path — for large shard sets, the bit-signature // popcount is ~11x faster than full Float32 cosine (proven in // bench-quantization.mjs). The sign-random-projection theorem // guarantees the Hamming distance approximates the angular distance, // so we can compute coarse similarities for all N shards at the // quantized cost and the result is good enough for the // sort/intent-boost/risk-boost path that follows. // // Only fires when (a) the packed signatures are current, (b) shard // count is >= 100 so the constant-factor cost of building the query // signature is amortised, and (c) dimensions match. const useQuantized = usePacked && this.packedSignatures !== null && this.packedShardCount >= 100 && this.wordsPerSig === ((dim + 31) >>> 5); let querySig = null; if (useQuantized) { querySig = this.buildQuerySignature(queryEmbedding); } const sigs = this.packedSignatures; const wps = this.wordsPerSig; for (let si = 0; si < this.shards.length; si++) { const shard = this.shards[si]; // Hard filter: risk class — skip cosine on filtered shards if (riskFilter && riskFilter.length > 0) { if (!riskFilter.includes(shard.rule.riskClass)) continue; } // Hard filter: repo scope if (repoScope) { const matchesScope = shard.rule.repoScopes.some(scope => scope === '**/*' || this.matchGlob(repoScope, scope)); if (!matchesScope) continue; } // Semantic similarity — only compute for survivors of the filter. // Prefer the quantized Hamming approximation when available (11x // faster than full Float32 dot — proven in bench-quantization.mjs). let similarity = 0; if (useQuantized && querySig !== null && sigs !== null) { const base = si * wps; let hamming = 0; for (let w = 0; w < wps; w++) { // Inline popcount32 — V8 emits much tighter machine code than // a function call inside the inner loop. Two cycles per word. let x = (sigs[base + w] ^ querySig[w]) >>> 0; x = x - ((x >>> 1) & 0x55555555); x = (x & 0x33333333) + ((x >>> 2) & 0x33333333); x = (x + (x >>> 4)) & 0x0f0f0f0f; hamming += (x * 0x01010101) >>> 24; } // Sign-random-projection: cos(θ) ≈ cos(π · hamming/dim). const sim = Math.cos((Math.PI * hamming) / dim); similarity = sim < 0 ? 0 : sim > 1 ? 1 : sim; } else if (usePacked && packed !== null) { const off = si * dim; let dot = 0; for (let k = 0; k < dim; k++) dot += packed[off + k] * queryEmbedding[k]; similarity = dot < 0 ? 0 : dot > 1 ? 1 : dot; } else if (shard.embedding) { similarity = this.cosineSimilarity(queryEmbedding, shard.embedding); } // Intent boost: if shard matches detected intent, boost score const intentBoost = shard.rule.intents.includes(intent) ? 0.15 : 0; // Risk boost: critical/high rules get a boost const riskBoost = shard.rule.riskClass === 'critical' ? 0.1 : shard.rule.riskClass === 'high' ? 0.05 : 0; const finalScore = similarity + intentBoost + riskBoost; const reasons = []; if (similarity > 0.3) reasons.push(`semantic match (${(similarity * 100).toFixed(0)}%)`); if (intentBoost > 0) reasons.push(`intent match (${intent})`); if (riskBoost > 0) reasons.push(`risk priority (${shard.rule.riskClass})`); results.push({ shard, similarity: finalScore, reason: reasons.join(', ') || 'general relevance', }); } // Sort by combined score descending return results.sort((a, b) => b.similarity - a.similarity); } /** * Select top N shards with contradiction checking * When two rules contradict, keep the one with higher priority */ selectWithContradictionCheck(scored, maxShards) { const selected = []; const selectedDomains = new Map(); // domain -> highest priority for (const item of scored) { if (selected.length >= maxShards) break; // Check for potential contradictions with already selected shards let dominated = false; for (const domain of item.shard.rule.domains) { const existingPriority = selectedDomains.get(domain); if (existingPriority !== undefined && existingPriority > item.shard.rule.priority) { // Higher priority rule already selected for this domain // Check if they're likely contradictory (similar domain, different intent) const existing = selected.find(s => s.shard.rule.domains.includes(domain) && s.shard.rule.priority > item.shard.rule.priority); if (existing && this.areContradictory(existing.shard.rule, item.shard.rule)) { dominated = true; break; } } } if (!dominated) { selected.push(item); for (const domain of item.shard.rule.domains) { const current = selectedDomains.get(domain) ?? 0; selectedDomains.set(domain, Math.max(current, item.shard.rule.priority)); } } } return selected; } /** * Check if two rules are contradictory */ areContradictory(a, b) { const negationPatterns = [ { positive: /\bmust\b/i, negative: /\bnever\b|\bdo not\b|\bavoid\b/i }, { positive: /\balways\b/i, negative: /\bnever\b|\bdon't\b/i }, { positive: /\brequire\b/i, negative: /\bforbid\b|\bprohibit\b/i }, ]; for (const { positive, negative } of negationPatterns) { if ((positive.test(a.text) && negative.test(b.text)) || (negative.test(a.text) && positive.test(b.text))) { return true; } } return false; } /** * Count contradictions in selected set */ countContradictions(selected) { let count = 0; for (let i = 0; i < selected.length; i++) { for (let j = i + 1; j < selected.length; j++) { if (this.areContradictory(selected[i].shard.rule, selected[j].shard.rule)) { count++; } } } return count; } /** * Build combined policy text for injection */ buildPolicyText(constitution, shards) { const parts = []; // Always include constitution parts.push(constitution.text); // Add retrieved shards if (shards.length > 0) { parts.push(''); parts.push('## Task-Specific Rules'); parts.push(''); for (const { shard, reason } of shards) { parts.push(`- ${shard.compactText}`); } } return parts.join('\n'); } /** * Simple glob matching (supports * and **). * Compiled regexes are cached per glob to avoid re-compiling on every call. */ matchGlob(path, glob) { let re = this.globCache.get(glob); if (!re) { const pattern = glob .replace(/\*\*/g, '{{GLOBSTAR}}') .replace(/\*/g, '[^/]*') .replace(/{{GLOBSTAR}}/g, '.*') .replace(/\//g, '\\/'); re = new RegExp(`^${pattern}$`); this.globCache.set(glob, re); } return re.test(path); } /** * Cosine similarity between two vectors. * * Phase 1 perf — the embeddings this retriever consumes are always * unit-normalised at production time: * - HashEmbeddingProvider divides by L2 norm before returning * (this file, line 134) * - ONNX providers (all-MiniLM-L6-v2 and friends) emit unit vectors * by design * That means `sqrt(normA) * sqrt(normB) === 1` and the only useful * computation per pair is the dot product. The old 3-accumulator * version computed dot + both norms + two sqrts + a div + a clamp — * for a result the math already guarantees lies in [-1, 1]. We drop * to pure dot + a defensive clamp. * * This compounds: every `scoreShards()` call ran `O(shards)` of these, * and `retrieveForTask()` runs it per query. */ cosineSimilarity(a, b) { if (a.length !== b.length) return 0; let dot = 0; for (let i = 0; i < a.length; i++) dot += a[i] * b[i]; // Defensive clamp — unit vectors should land in [-1, 1] but tiny // FP drift can produce 1.0000000002. Snap to [0, 1]. return dot < 0 ? 0 : dot > 1 ? 1 : dot; } /** * Get current shard count */ get shardCount() { return this.shards.length; } /** * Get constitution */ getConstitution() { return this.constitution; } } /** * Create a retriever instance */ export function createRetriever(embeddingProvider) { return new ShardRetriever(embeddingProvider); } //# sourceMappingURL=retriever.js.map