claude-flow
Version:
Ruflo - Enterprise AI agent orchestration for Claude Code. Deploy 60+ specialized agents in coordinated swarms with self-learning, fault-tolerant consensus, vector memory, and MCP integration
172 lines • 6.34 kB
TypeScript
/**
* Task Intent Classifier + Shard Retriever
*
* Stores rule shards in vector storage with embeddings and metadata.
* At task start, retrieves the top N shards by semantic similarity
* with hard filters by risk class and repo scope.
*
* Retrieval contract:
* 1. Always include the constitution
* 2. Retrieve up to 5 shards by semantic similarity
* 3. Add hard filters by risk class and repo scope
* 4. Contradiction check: prefer higher-priority rule ID
*
* @module @claude-flow/guidance/retriever
*/
import type { PolicyBundle, Constitution, TaskIntent, RetrievalRequest, RetrievalResult } from './types.js';
export interface IEmbeddingProvider {
embed(text: string): Promise<Float32Array>;
batchEmbed(texts: string[]): Promise<Float32Array[]>;
}
/**
* Deterministic hash-based embedding provider — **test-only**.
*
* Produces fixed-dimension vectors from a simple character-hash → sin()
* transform. The resulting embeddings have no real semantic meaning;
* they are stable and fast, which makes them useful for unit/integration
* tests that need a concrete {@link IEmbeddingProvider} without loading
* an ONNX model.
*
* **Do NOT use in production** — replace with a real model-backed
* provider (e.g. the agentic-flow ONNX integration).
*/
export declare class HashEmbeddingProvider implements IEmbeddingProvider {
private dimensions;
private cache;
constructor(dimensions?: number);
embed(text: string): Promise<Float32Array>;
batchEmbed(texts: string[]): Promise<Float32Array[]>;
private hashEmbed;
}
export declare class ShardRetriever {
private shards;
private constitution;
private embeddingProvider;
private indexed;
private globCache;
private packedEmbeddings;
private packedDim;
private packedShardCount;
private packedSignatures;
private wordsPerSig;
constructor(embeddingProvider?: IEmbeddingProvider);
/**
* Load a compiled policy bundle
*/
loadBundle(bundle: PolicyBundle): Promise<void>;
/**
* Index all shards by generating embeddings.
*
* M3 substrate — also packs every shard embedding into a single
* contiguous Float32Array (`packedEmbeddings`) so scoreShards can run
* the cosine as a vectorized matrix-vector dot in cache-friendly
* sequential memory rather than chasing per-shard heap pointers.
* Costs O(n × dim) at index time (one-shot) for an O(n) scan win
* on every query.
*/
indexShards(): Promise<void>;
/**
* Build a 1-bit sign signature for the query vector. Matches the
* packed-shard format produced in indexShards above.
*/
private buildQuerySignature;
/**
* Hamming-Weight popcount on a single 32-bit word (Wegner / Wilkes).
* Tested at ~1 ns on V8 — no native popcnt instruction exposed.
*/
private static popcount32;
/**
* Classify task intent
*/
classifyIntent(taskDescription: string): {
intent: TaskIntent;
confidence: number;
};
/**
* Retrieve relevant shards for a task
*
* Contract:
* 1. Always include the constitution
* 2. Up to maxShards by semantic similarity
* 3. Hard filters by risk class and repo scope
* 4. Contradiction check: prefer higher priority
*/
retrieve(request: RetrievalRequest): Promise<RetrievalResult>;
/**
* Score all shards against the query.
*
* M3 perf substrate — three changes from the baseline:
*
* 1. Filter FIRST, cosine SECOND. The old code computed cosine for
* every shard regardless of whether riskFilter/repoScope would
* throw it away. We now decide eligibility first and only do
* the 384-dim multiply for survivors.
*
* 2. Packed-matrix cosine — when `packedEmbeddings` is current and
* dim matches, compute the dot directly from contiguous memory
* (one allocation, sequential reads) instead of dereferencing
* `shard.embedding` per call. Embeddings are always unit-
* normalised so cosine === dot + clamp.
*
* 3. Top-K partial selection — when the caller only wants `maxShards`
* results (typical), don't `.sort()` the entire candidate list.
* Maintain a fixed-size heap of size K and only compare/swap
* against its current minimum. Drops the final step from
* O(n log n) to O(n log K).
*/
private scoreShards;
/**
* Select top N shards with contradiction checking
* When two rules contradict, keep the one with higher priority
*/
private selectWithContradictionCheck;
/**
* Check if two rules are contradictory
*/
private areContradictory;
/**
* Count contradictions in selected set
*/
private countContradictions;
/**
* Build combined policy text for injection
*/
private buildPolicyText;
/**
* Simple glob matching (supports * and **).
* Compiled regexes are cached per glob to avoid re-compiling on every call.
*/
private matchGlob;
/**
* Cosine similarity between two vectors.
*
* Phase 1 perf — the embeddings this retriever consumes are always
* unit-normalised at production time:
* - HashEmbeddingProvider divides by L2 norm before returning
* (this file, line 134)
* - ONNX providers (all-MiniLM-L6-v2 and friends) emit unit vectors
* by design
* That means `sqrt(normA) * sqrt(normB) === 1` and the only useful
* computation per pair is the dot product. The old 3-accumulator
* version computed dot + both norms + two sqrts + a div + a clamp —
* for a result the math already guarantees lies in [-1, 1]. We drop
* to pure dot + a defensive clamp.
*
* This compounds: every `scoreShards()` call ran `O(shards)` of these,
* and `retrieveForTask()` runs it per query.
*/
private cosineSimilarity;
/**
* Get current shard count
*/
get shardCount(): number;
/**
* Get constitution
*/
getConstitution(): Constitution | null;
}
/**
* Create a retriever instance
*/
export declare function createRetriever(embeddingProvider?: IEmbeddingProvider): ShardRetriever;
//# sourceMappingURL=retriever.d.ts.map