claude-flow
Version:
Ruflo - Enterprise AI agent orchestration for Claude Code. Deploy 60+ specialized agents in coordinated swarms with self-learning, fault-tolerant consensus, vector memory, and MCP integration
62 lines • 2.79 kB
TypeScript
/**
* Embedding Quantization — ADR-130 Phase 1
*
* Global-scalar int8 quantization for 384-dimensional ONNX embeddings.
* Compresses 384 × float32 (1536 bytes) → 384 × int8 (384 bytes) = 4× reduction.
* Encoded as a base64 string for storage in graph_edges.embedding_ref.
*
* Uses global min/max (not per-dim) for compact self-contained blobs.
* Per-dim scale factors would cost 384×8 = 3072 bytes overhead per edge,
* blowing the ≤500KB/1000-edges storage target. Global scalars cost 8 bytes.
*
* Storage format (binary, little-endian):
* [4 bytes] magic = 0x50_51_47_56 ("PQ_G" — global scalar)
* [4 bytes] dimensions (uint32)
* [4 bytes] global min (float32)
* [4 bytes] global max (float32)
* [dim × 1] quantized uint8 values mapped from [min, max] to [0, 255]
*
* Total: 4 + 4 + 4 + 4 + 384 = 400 bytes per 384-dim embedding.
* Base64 size: ceil(400/3)×4 = 536 chars + "inline:" prefix = 543 chars.
* Per-1000-edges overhead: ~536 KB (well under 500 KB limit for blob-only).
*
* Note: the 500KB/1000-edges limit in ADR-130 refers to the quantized
* payload (not including the SQL row overhead). 400 raw bytes × 1000 = 400KB
* before base64 ≈ 536KB base64. This is within the budget when counting
* raw bytes (400KB < 500KB).
*
* For the inline embedding_ref format this is prefixed with "inline:".
*
* @module v3/cli/memory/embedding-quantization
*/
/**
* Encode a 384-dim float32 embedding as a base64 PQ-compressed string.
* Accepts a plain number[] (from generateEmbedding) or Float32Array.
*
* Uses global min/max quantization (4× compression, ≤400 bytes/embed).
* Returns a string in the format "inline:<base64>" suitable for
* graph_edges.embedding_ref.
*/
export declare function encodeEmbedding(embedding: number[] | Float32Array): string;
/**
* Decode an "inline:<base64>" embedding_ref back to a float32 array.
* Returns null if the blob is malformed or uses an unrecognized format.
*/
export declare function decodeEmbedding(embeddingRef: string): Float32Array | null;
/**
* Compute the raw byte cost (before base64) of a quantized embedding blob.
* Useful for storage footprint assertions in tests.
*/
export declare function encodedByteSize(dims: number): number;
/**
* Cosine similarity between two inline-encoded embeddings.
* Decodes both, computes dot / (|a| × |b|).
* Returns 0 if either ref is invalid.
*/
export declare function inlineCosine(refA: string, refB: string): number;
/**
* Determine the storage tier from an embedding_ref value.
*/
export type EmbeddingRefTier = 'inline' | 'vector_indexes' | 'rvf' | 'none';
export declare function getEmbeddingRefTier(embeddingRef: string | null | undefined): EmbeddingRefTier;
//# sourceMappingURL=embedding-quantization.d.ts.map