claude-flow

Version:

Ruflo - Enterprise AI agent orchestration for Claude Code. Deploy 60+ specialized agents in coordinated swarms with self-learning, fault-tolerant consensus, vector memory, and MCP integration

github.com/ruvnet/claude-flow

ruvnet/claude-flow

1,772 lines (1,569 loc) • 56.9 kB

text/typescript

/** * RuVector PostgreSQL Bridge - Vector Quantization Module * * Comprehensive vector quantization for memory reduction: * - Scalar Quantization (Int8): 4x memory reduction * - Binary Quantization: 32x memory reduction * - Product Quantization (PQ): High compression with codebooks * - Optimized Product Quantization (OPQ): PQ with learned rotation * * @module @claude-flow/plugins/integrations/ruvector/quantization * @version 1.0.0 */ // ============================================================================ // Type Definitions // ============================================================================ /** * Quantization type options. */ export type QuantizationType = 'scalar' | 'binary' | 'pq' | 'opq'; /** * Base interface for all quantizers. */ export interface IQuantizer { /** Quantization type */ readonly type: QuantizationType; /** Original vector dimensions */ readonly dimensions: number; /** Quantize a batch of vectors */ quantize(vectors: number[][]): Uint8Array[] | Int8Array[]; /** Dequantize back to float vectors (lossy) */ dequantize(quantized: Uint8Array[] | Int8Array[]): number[][]; /** Get compression ratio */ getCompressionRatio(): number; /** Get memory reduction string (e.g., "4x") */ getMemoryReduction(): string; } /** * Options for scalar quantization. */ export interface ScalarQuantizationOptions { /** Vector dimensions */ dimensions: number; /** Minimum value for calibration (auto-computed if not provided) */ minValue?: number; /** Maximum value for calibration (auto-computed if not provided) */ maxValue?: number; /** Use symmetric quantization around zero */ symmetric?: boolean; /** Number of bits for quantization (default: 8) */ bits?: number; } /** * Options for binary quantization. */ export interface BinaryQuantizationOptions { /** Vector dimensions */ dimensions: number; /** Threshold for binarization (default: 0, use sign) */ threshold?: number; /** Use learned thresholds per dimension */ learnedThresholds?: number[]; } /** * Options for product quantization. */ export interface ProductQuantizationOptions { /** Vector dimensions */ dimensions: number; /** Number of subvectors (M) - must divide dimensions evenly */ numSubvectors: number; /** Number of centroids per subvector (K) - typically 256 */ numCentroids: number; /** Maximum iterations for k-means training */ maxIterations?: number; /** Convergence tolerance */ tolerance?: number; /** Random seed for reproducibility */ seed?: number; } /** * Options for optimized product quantization. */ export interface OptimizedProductQuantizationOptions extends ProductQuantizationOptions { /** Number of OPQ iterations */ opqIterations?: number; /** Learning rate for rotation optimization */ learningRate?: number; } /** * General quantization options union type. */ export type QuantizationOptions = | ScalarQuantizationOptions | BinaryQuantizationOptions | ProductQuantizationOptions | OptimizedProductQuantizationOptions; /** * Statistics from quantization operations. */ export interface QuantizationStats { /** Compression ratio (original size / compressed size) */ compressionRatio: number; /** Memory reduction string (e.g., "4x", "32x") */ memoryReduction: string; /** Recall@10 for approximate search (0-1) */ recallAt10: number; /** Search speedup compared to exact search */ searchSpeedup: number; /** Mean squared error from quantization */ mse?: number; /** Training time in milliseconds */ trainingTimeMs?: number; } /** * Calibration data for scalar quantization. */ interface CalibrationData { minValue: number; maxValue: number; scale: number; zeroPoint: number; } /** * Codebook for product quantization. */ interface Codebook { /** Centroids [numCentroids, subvectorDim] */ centroids: number[][]; /** Assignment counts for statistics */ counts: number[]; } // ============================================================================ // Utility Functions // ============================================================================ /** * Computes the Euclidean distance between two vectors. */ function euclideanDistance(a: number[], b: number[]): number { let sum = 0; for (let i = 0; i < a.length; i++) { const diff = a[i] - b[i]; sum += diff * diff; } return Math.sqrt(sum); } /** * Computes the squared Euclidean distance. */ function squaredEuclideanDistance(a: number[], b: number[]): number { let sum = 0; for (let i = 0; i < a.length; i++) { const diff = a[i] - b[i]; sum += diff * diff; } return sum; } /** * Computes the dot product of two vectors. */ function dot(a: number[], b: number[]): number { let sum = 0; for (let i = 0; i < a.length; i++) { sum += a[i] * b[i]; } return sum; } /** * Computes the norm of a vector. */ function norm(v: number[]): number { return Math.sqrt(dot(v, v)); } /** * Normalizes a vector to unit length. */ function normalize(v: number[]): number[] { const n = norm(v); if (n < 1e-10) return v.map(() => 0); return v.map(x => x / n); } /** * Creates a zero-filled matrix. */ function zerosMatrix(rows: number, cols: number): number[][] { return Array.from({ length: rows }, () => new Array(cols).fill(0)); } /** * Creates an identity matrix. */ function identityMatrix(n: number): number[][] { const result = zerosMatrix(n, n); for (let i = 0; i < n; i++) { result[i][i] = 1; } return result; } /** * Matrix-vector multiplication. */ function matVec(matrix: number[][], vec: number[]): number[] { return matrix.map(row => dot(row, vec)); } /** * Matrix-matrix multiplication. */ function matMul(a: number[][], b: number[][]): number[][] { const rows = a.length; const cols = b[0].length; const inner = b.length; const result = zerosMatrix(rows, cols); for (let i = 0; i < rows; i++) { for (let j = 0; j < cols; j++) { let sum = 0; for (let k = 0; k < inner; k++) { sum += a[i][k] * b[k][j]; } result[i][j] = sum; } } return result; } /** * Matrix transpose. */ function transpose(matrix: number[][]): number[][] { const rows = matrix.length; const cols = matrix[0].length; const result = zerosMatrix(cols, rows); for (let i = 0; i < rows; i++) { for (let j = 0; j < cols; j++) { result[j][i] = matrix[i][j]; } } return result; } /** * Simple seeded random number generator (Mulberry32). */ function createRng(seed: number): () => number { return function() { seed = seed + 0x6d2b79f5 | 0; let t = Math.imul(seed ^ seed >>> 15, 1 | seed); t = t + Math.imul(t ^ t >>> 7, 61 | t) ^ t; return ((t ^ t >>> 14) >>> 0) / 4294967296; }; } // ============================================================================ // Scalar Quantization // ============================================================================ /** * ScalarQuantizer implements per-dimension scalar quantization. * * Quantizes float32 vectors to int8 for 4x memory reduction. * Supports symmetric and asymmetric quantization schemes. * * @example * ```typescript * const quantizer = new ScalarQuantizer({ dimensions: 128 }); * quantizer.calibrate(trainingVectors); * const quantized = quantizer.quantize(vectors); * const reconstructed = quantizer.dequantize(quantized); * ``` */ export class ScalarQuantizer implements IQuantizer { readonly type: QuantizationType = 'scalar'; readonly dimensions: number; private calibration: CalibrationData; private readonly symmetric: boolean; private readonly bits: number; private readonly qmin: number; private readonly qmax: number; private isCalibrated: boolean = false; constructor(options: ScalarQuantizationOptions) { this.dimensions = options.dimensions; this.symmetric = options.symmetric ?? false; this.bits = options.bits ?? 8; // Compute quantization range based on bits this.qmin = -(1 << (this.bits - 1)); this.qmax = (1 << (this.bits - 1)) - 1; // Initialize with default calibration this.calibration = { minValue: options.minValue ?? -1, maxValue: options.maxValue ?? 1, scale: 1, zeroPoint: 0, }; if (options.minValue !== undefined && options.maxValue !== undefined) { this.computeCalibration(options.minValue, options.maxValue); this.isCalibrated = true; } } /** * Calibrates the quantizer using sample vectors. * * @param samples - Representative vectors for calibration */ calibrate(samples: number[][]): void { if (samples.length === 0) { throw new Error('Cannot calibrate with empty samples'); } // Find min and max across all dimensions and samples let minValue = Infinity; let maxValue = -Infinity; for (const sample of samples) { for (let i = 0; i < sample.length; i++) { minValue = Math.min(minValue, sample[i]); maxValue = Math.max(maxValue, sample[i]); } } // Add small margin for numerical stability const range = maxValue - minValue; minValue -= range * 0.01; maxValue += range * 0.01; this.computeCalibration(minValue, maxValue); this.isCalibrated = true; } private computeCalibration(minValue: number, maxValue: number): void { if (this.symmetric) { // Symmetric quantization: use same scale for positive and negative const absMax = Math.max(Math.abs(minValue), Math.abs(maxValue)); this.calibration = { minValue: -absMax, maxValue: absMax, scale: (2 * absMax) / (this.qmax - this.qmin), zeroPoint: 0, }; } else { // Asymmetric quantization: full range utilization this.calibration = { minValue, maxValue, scale: (maxValue - minValue) / (this.qmax - this.qmin), zeroPoint: Math.round(this.qmin - minValue / ((maxValue - minValue) / (this.qmax - this.qmin))), }; } } /** * Quantizes float32 vectors to int8. * * @param vectors - Input vectors * @returns Quantized int8 arrays */ quantize(vectors: number[][]): Int8Array[] { if (!this.isCalibrated) { // Auto-calibrate if not done this.calibrate(vectors); } const { scale, zeroPoint } = this.calibration; return vectors.map((vec) => { const quantized = new Int8Array(vec.length); for (let i = 0; i < vec.length; i++) { const q = Math.round(vec[i] / scale) + zeroPoint; quantized[i] = Math.max(this.qmin, Math.min(this.qmax, q)); } return quantized; }); } /** * Dequantizes int8 arrays back to float32 vectors. * * @param quantized - Quantized int8 arrays * @returns Reconstructed float vectors (lossy) */ dequantize(quantized: Int8Array[]): number[][] { const { scale, zeroPoint } = this.calibration; return quantized.map((q) => { const vec = new Array(q.length); for (let i = 0; i < q.length; i++) { vec[i] = (q[i] - zeroPoint) * scale; } return vec; }); } /** * Computes approximate distance using quantized vectors. * * @param a - First quantized vector * @param b - Second quantized vector * @returns Approximate Euclidean distance */ quantizedDistance(a: Int8Array, b: Int8Array): number { const { scale } = this.calibration; let sum = 0; for (let i = 0; i < a.length; i++) { const diff = a[i] - b[i]; sum += diff * diff; } return Math.sqrt(sum) * scale; } getCompressionRatio(): number { // float32 (4 bytes) -> int8 (1 byte) = 4x return 4; } getMemoryReduction(): string { return '4x'; } /** * Gets the current calibration data. */ getCalibration(): CalibrationData { return { ...this.calibration }; } /** * Sets calibration data directly. */ setCalibration(calibration: CalibrationData): void { this.calibration = { ...calibration }; this.isCalibrated = true; } } // ============================================================================ // Binary Quantization // ============================================================================ /** * BinaryQuantizer implements binary quantization for extreme compression. * * Quantizes float32 vectors to binary (1 bit per dimension) for 32x memory reduction. * Uses Hamming distance for fast comparison. * * @example * ```typescript * const quantizer = new BinaryQuantizer({ dimensions: 128 }); * const quantized = quantizer.quantize(vectors); * const distance = quantizer.hammingDistance(quantized[0], quantized[1]); * ``` */ export class BinaryQuantizer implements IQuantizer { readonly type: QuantizationType = 'binary'; readonly dimensions: number; private threshold: number; private learnedThresholds: number[] | null; private readonly bytesPerVector: number; constructor(options: BinaryQuantizationOptions) { this.dimensions = options.dimensions; this.threshold = options.threshold ?? 0; this.learnedThresholds = options.learnedThresholds ?? null; // Calculate bytes needed (ceil(dimensions / 8)) this.bytesPerVector = Math.ceil(this.dimensions / 8); } /** * Learns optimal thresholds per dimension from training data. * * @param samples - Training vectors */ learnThresholds(samples: number[][]): void { if (samples.length === 0) { throw new Error('Cannot learn thresholds from empty samples'); } // Compute median per dimension as threshold this.learnedThresholds = new Array(this.dimensions); for (let d = 0; d < this.dimensions; d++) { const values = samples.map(s => s[d]).sort((a, b) => a - b); const mid = Math.floor(values.length / 2); this.learnedThresholds[d] = values.length % 2 === 0 ? (values[mid - 1] + values[mid]) / 2 : values[mid]; } } /** * Quantizes float32 vectors to binary. * * @param vectors - Input vectors * @returns Binary quantized arrays (packed bits) */ quantize(vectors: number[][]): Uint8Array[] { return vectors.map((vec) => { const binary = new Uint8Array(this.bytesPerVector); for (let i = 0; i < this.dimensions; i++) { const threshold = this.learnedThresholds ? this.learnedThresholds[i] : this.threshold; if (vec[i] > threshold) { const byteIdx = Math.floor(i / 8); const bitIdx = i % 8; binary[byteIdx] |= (1 << bitIdx); } } return binary; }); } /** * Dequantizes binary arrays back to float vectors. * Note: This is highly lossy and mainly for debugging. * * @param quantized - Binary quantized arrays * @returns Reconstructed vectors (-1 or +1 per dimension) */ dequantize(quantized: Uint8Array[]): number[][] { return quantized.map((binary) => { const vec = new Array(this.dimensions); for (let i = 0; i < this.dimensions; i++) { const byteIdx = Math.floor(i / 8); const bitIdx = i % 8; const bit = (binary[byteIdx] >> bitIdx) & 1; vec[i] = bit === 1 ? 1 : -1; } return vec; }); } /** * Computes Hamming distance between two binary vectors. * * @param a - First binary vector * @param b - Second binary vector * @returns Hamming distance (number of differing bits) */ hammingDistance(a: Uint8Array, b: Uint8Array): number { let distance = 0; for (let i = 0; i < a.length; i++) { const xor = a[i] ^ b[i]; // Count bits using Brian Kernighan's algorithm let bits = xor; while (bits) { distance++; bits &= bits - 1; } } return distance; } /** * Two-stage search: binary filter + rerank with exact distances. * * @param query - Query vector (float) * @param candidates - Candidate vectors (float) * @param k - Number of results to return * @param filterRatio - Ratio of candidates to keep after binary filter (default: 10) * @returns Indices of top-k candidates after reranking */ searchWithRerank( query: number[], candidates: number[][], k: number, filterRatio: number = 10 ): number[] { // Step 1: Quantize query and all candidates const queryBinary = this.quantize([query])[0]; const candidatesBinary = this.quantize(candidates); // Step 2: Compute Hamming distances const distances: Array<{ index: number; hamming: number }> = []; for (let i = 0; i < candidatesBinary.length; i++) { distances.push({ index: i, hamming: this.hammingDistance(queryBinary, candidatesBinary[i]), }); } // Step 3: Filter top candidates by Hamming distance distances.sort((a, b) => a.hamming - b.hamming); const numCandidates = Math.min(k * filterRatio, candidates.length); const filtered = distances.slice(0, numCandidates); // Step 4: Rerank filtered candidates with exact Euclidean distance const reranked: Array<{ index: number; distance: number }> = []; for (const { index } of filtered) { reranked.push({ index, distance: euclideanDistance(query, candidates[index]), }); } // Step 5: Sort by exact distance and return top-k reranked.sort((a, b) => a.distance - b.distance); return reranked.slice(0, k).map(r => r.index); } /** * Batch Hamming distance computation. * * @param query - Query binary vector * @param candidates - Candidate binary vectors * @returns Array of Hamming distances */ batchHammingDistance(query: Uint8Array, candidates: Uint8Array[]): number[] { return candidates.map(c => this.hammingDistance(query, c)); } getCompressionRatio(): number { // float32 (32 bits) -> binary (1 bit) = 32x return 32; } getMemoryReduction(): string { return '32x'; } } // ============================================================================ // Product Quantization // ============================================================================ /** * ProductQuantizer implements product quantization for high compression. * * Splits vectors into M subvectors and quantizes each to K centroids. * Memory: M * ceil(log2(K)) bits per vector (e.g., M=8, K=256 = 8 bytes) * * @example * ```typescript * const pq = new ProductQuantizer({ * dimensions: 128, * numSubvectors: 8, * numCentroids: 256 * }); * await pq.train(trainingVectors); * const codes = pq.encode(vectors); * const distances = pq.computeDistances(query, codes); * ``` */ export class ProductQuantizer implements IQuantizer { readonly type: QuantizationType = 'pq'; readonly dimensions: number; readonly numSubvectors: number; readonly numCentroids: number; readonly subvectorDim: number; protected codebooks: Codebook[] = []; protected isTrained: boolean = false; protected readonly maxIterations: number; protected readonly tolerance: number; protected readonly rng: () => number; constructor(options: ProductQuantizationOptions) { this.dimensions = options.dimensions; this.numSubvectors = options.numSubvectors; this.numCentroids = options.numCentroids; // Validate dimensions divisibility if (options.dimensions % options.numSubvectors !== 0) { throw new Error( `Dimensions (${options.dimensions}) must be divisible by numSubvectors (${options.numSubvectors})` ); } this.subvectorDim = options.dimensions / options.numSubvectors; this.maxIterations = options.maxIterations ?? 100; this.tolerance = options.tolerance ?? 1e-6; this.rng = createRng(options.seed ?? 42); } /** * Trains codebooks from training data using k-means clustering. * * @param vectors - Training vectors */ async train(vectors: number[][]): Promise<void> { if (vectors.length < this.numCentroids) { throw new Error( `Need at least ${this.numCentroids} training vectors, got ${vectors.length}` ); } this.codebooks = []; // Train a codebook for each subvector for (let m = 0; m < this.numSubvectors; m++) { // Extract subvectors const subvectors = this.extractSubvectors(vectors, m); // Train codebook using k-means const codebook = await this.trainCodebook(subvectors); this.codebooks.push(codebook); } this.isTrained = true; } /** * Extracts the m-th subvector from all vectors. */ protected extractSubvectors(vectors: number[][], m: number): number[][] { const start = m * this.subvectorDim; return vectors.map(v => v.slice(start, start + this.subvectorDim)); } /** * Trains a single codebook using k-means clustering. */ protected async trainCodebook(subvectors: number[][]): Promise<Codebook> { const k = this.numCentroids; const dim = this.subvectorDim; // Initialize centroids using k-means++ initialization const centroids = this.kmeansppInit(subvectors, k); const counts = new Array(k).fill(0); // K-means iterations for (let iter = 0; iter < this.maxIterations; iter++) { // Assignment step const assignments: number[][] = Array.from({ length: k }, () => []); for (let i = 0; i < subvectors.length; i++) { const nearestIdx = this.findNearestCentroid(subvectors[i], centroids); assignments[nearestIdx].push(i); } // Update step let maxShift = 0; for (let c = 0; c < k; c++) { if (assignments[c].length === 0) { // Reinitialize empty centroid const randomIdx = Math.floor(this.rng() * subvectors.length); centroids[c] = [...subvectors[randomIdx]]; continue; } const newCentroid = new Array(dim).fill(0); for (const idx of assignments[c]) { for (let d = 0; d < dim; d++) { newCentroid[d] += subvectors[idx][d]; } } for (let d = 0; d < dim; d++) { newCentroid[d] /= assignments[c].length; } const shift = squaredEuclideanDistance(centroids[c], newCentroid); maxShift = Math.max(maxShift, shift); centroids[c] = newCentroid; counts[c] = assignments[c].length; } // Check convergence if (maxShift < this.tolerance) { break; } // Yield to event loop periodically if (iter % 10 === 0) { await new Promise(resolve => setTimeout(resolve, 0)); } } return { centroids, counts }; } /** * K-means++ initialization for better centroid selection. */ protected kmeansppInit(subvectors: number[][], k: number): number[][] { const centroids: number[][] = []; // First centroid: random const firstIdx = Math.floor(this.rng() * subvectors.length); centroids.push([...subvectors[firstIdx]]); // Remaining centroids: proportional to squared distance for (let c = 1; c < k; c++) { const distances = subvectors.map(v => { let minDist = Infinity; for (const centroid of centroids) { const dist = squaredEuclideanDistance(v, centroid); minDist = Math.min(minDist, dist); } return minDist; }); const totalDist = distances.reduce((a, b) => a + b, 0); let threshold = this.rng() * totalDist; for (let i = 0; i < subvectors.length; i++) { threshold -= distances[i]; if (threshold <= 0) { centroids.push([...subvectors[i]]); break; } } // Fallback if we didn't select (numerical issues) if (centroids.length <= c) { const fallbackIdx = Math.floor(this.rng() * subvectors.length); centroids.push([...subvectors[fallbackIdx]]); } } return centroids; } /** * Finds the nearest centroid index for a subvector. */ protected findNearestCentroid(subvector: number[], centroids: number[][]): number { let minDist = Infinity; let minIdx = 0; for (let i = 0; i < centroids.length; i++) { const dist = squaredEuclideanDistance(subvector, centroids[i]); if (dist < minDist) { minDist = dist; minIdx = i; } } return minIdx; } /** * Encodes vectors to PQ codes. * * @param vectors - Input vectors * @returns PQ codes (one byte per subvector, assuming K=256) */ encode(vectors: number[][]): Uint8Array[] { if (!this.isTrained) { throw new Error('ProductQuantizer must be trained before encoding'); } return vectors.map((vec) => { const codes = new Uint8Array(this.numSubvectors); for (let m = 0; m < this.numSubvectors; m++) { const start = m * this.subvectorDim; const subvector = vec.slice(start, start + this.subvectorDim); codes[m] = this.findNearestCentroid(subvector, this.codebooks[m].centroids); } return codes; }); } /** * Implements IQuantizer interface - encodes vectors. */ quantize(vectors: number[][]): Uint8Array[] { return this.encode(vectors); } /** * Decodes PQ codes back to approximate vectors. * * @param codes - PQ codes * @returns Reconstructed vectors */ decode(codes: Uint8Array[]): number[][] { if (!this.isTrained) { throw new Error('ProductQuantizer must be trained before decoding'); } return codes.map((code) => { const vec = new Array(this.dimensions); for (let m = 0; m < this.numSubvectors; m++) { const centroid = this.codebooks[m].centroids[code[m]]; const start = m * this.subvectorDim; for (let d = 0; d < this.subvectorDim; d++) { vec[start + d] = centroid[d]; } } return vec; }); } /** * Implements IQuantizer interface - decodes vectors. */ dequantize(quantized: Uint8Array[]): number[][] { return this.decode(quantized); } /** * Computes asymmetric distances from a query to encoded vectors. * * Asymmetric distance computation (ADC): * - Query is NOT quantized (exact) * - Database vectors are quantized (codes) * - Distance is computed using lookup tables * * @param query - Query vector (float) * @param codes - Database PQ codes * @returns Array of distances */ computeDistances(query: number[], codes: Uint8Array[]): number[] { if (!this.isTrained) { throw new Error('ProductQuantizer must be trained before computing distances'); } // Build distance lookup tables const distanceTables = this.buildDistanceTables(query); // Compute distances using tables return codes.map((code) => { let distance = 0; for (let m = 0; m < this.numSubvectors; m++) { distance += distanceTables[m][code[m]]; } return Math.sqrt(distance); }); } /** * Builds distance lookup tables for asymmetric distance computation. */ protected buildDistanceTables(query: number[]): number[][] { const tables: number[][] = []; for (let m = 0; m < this.numSubvectors; m++) { const start = m * this.subvectorDim; const querySubvector = query.slice(start, start + this.subvectorDim); const table = new Array(this.numCentroids); for (let c = 0; c < this.numCentroids; c++) { table[c] = squaredEuclideanDistance( querySubvector, this.codebooks[m].centroids[c] ); } tables.push(table); } return tables; } /** * Computes symmetric distances between two sets of codes. * * @param codesA - First set of PQ codes * @param codesB - Second set of PQ codes * @returns Distance matrix */ computeSymmetricDistances(codesA: Uint8Array[], codesB: Uint8Array[]): number[][] { if (!this.isTrained) { throw new Error('ProductQuantizer must be trained before computing distances'); } // Precompute inter-centroid distances for each subvector const centroidDists: number[][][] = []; for (let m = 0; m < this.numSubvectors; m++) { const dists = zerosMatrix(this.numCentroids, this.numCentroids); for (let i = 0; i < this.numCentroids; i++) { for (let j = i; j < this.numCentroids; j++) { const d = squaredEuclideanDistance( this.codebooks[m].centroids[i], this.codebooks[m].centroids[j] ); dists[i][j] = d; dists[j][i] = d; } } centroidDists.push(dists); } // Compute distance matrix const result = zerosMatrix(codesA.length, codesB.length); for (let i = 0; i < codesA.length; i++) { for (let j = 0; j < codesB.length; j++) { let dist = 0; for (let m = 0; m < this.numSubvectors; m++) { dist += centroidDists[m][codesA[i][m]][codesB[j][m]]; } result[i][j] = Math.sqrt(dist); } } return result; } getCompressionRatio(): number { // float32 * dimensions -> numSubvectors bytes (for K=256) // = (4 * dimensions) / numSubvectors return (4 * this.dimensions) / this.numSubvectors; } getMemoryReduction(): string { const ratio = this.getCompressionRatio(); return `${ratio.toFixed(1)}x`; } /** * Gets the trained codebooks. */ getCodebooks(): Codebook[] { return this.codebooks.map(cb => ({ centroids: cb.centroids.map(c => [...c]), counts: [...cb.counts], })); } /** * Sets codebooks directly (for loading pretrained). */ setCodebooks(codebooks: Codebook[]): void { if (codebooks.length !== this.numSubvectors) { throw new Error(`Expected ${this.numSubvectors} codebooks, got ${codebooks.length}`); } this.codebooks = codebooks; this.isTrained = true; } /** * Checks if the quantizer is trained. */ get trained(): boolean { return this.isTrained; } } // ============================================================================ // Optimized Product Quantization (OPQ) // ============================================================================ /** * OptimizedProductQuantizer extends PQ with learned rotation. * * Learns an orthogonal rotation matrix to minimize quantization error. * The rotation decorrelates dimensions and distributes variance evenly. * * @example * ```typescript * const opq = new OptimizedProductQuantizer({ * dimensions: 128, * numSubvectors: 8, * numCentroids: 256, * opqIterations: 10 * }); * await opq.trainWithRotation(trainingVectors); * const codes = opq.encode(vectors); * ``` */ export class OptimizedProductQuantizer extends ProductQuantizer { override readonly type: QuantizationType = 'opq'; private rotationMatrix: number[][] | null = null; private readonly opqIterations: number; private readonly learningRate: number; constructor(options: OptimizedProductQuantizationOptions) { super(options); this.opqIterations = options.opqIterations ?? 10; this.learningRate = options.learningRate ?? 0.01; } /** * Trains the quantizer with rotation matrix optimization. * * @param vectors - Training vectors */ async trainWithRotation(vectors: number[][]): Promise<void> { // Initialize rotation matrix as identity this.rotationMatrix = identityMatrix(this.dimensions); for (let opqIter = 0; opqIter < this.opqIterations; opqIter++) { // Step 1: Rotate vectors const rotatedVectors = this.rotateVectors(vectors); // Step 2: Train PQ on rotated vectors await super.train(rotatedVectors); // Step 3: Update rotation matrix using Procrustes analysis this.updateRotation(vectors); // Yield to event loop await new Promise(resolve => setTimeout(resolve, 0)); } // Final PQ training with final rotation const finalRotated = this.rotateVectors(vectors); await super.train(finalRotated); } /** * Rotates vectors using the learned rotation matrix. */ private rotateVectors(vectors: number[][]): number[][] { if (!this.rotationMatrix) { return vectors; } return vectors.map(v => matVec(this.rotationMatrix!, v)); } /** * Updates the rotation matrix using Procrustes analysis. * Minimizes ||X - R * decode(encode(R^T * X))||^2 */ private updateRotation(vectors: number[][]): void { if (!this.rotationMatrix) return; // Get reconstructed vectors const rotated = this.rotateVectors(vectors); const codes = this.encode(rotated); const reconstructed = this.decode(codes); // Compute X^T * Y for Procrustes const xty = zerosMatrix(this.dimensions, this.dimensions); for (let i = 0; i < vectors.length; i++) { for (let j = 0; j < this.dimensions; j++) { for (let k = 0; k < this.dimensions; k++) { xty[j][k] += vectors[i][j] * reconstructed[i][k]; } } } // SVD approximation using power iteration // For simplicity, we use gradient descent on the rotation const gradientUpdate = this.computeRotationGradient(vectors, reconstructed); // Update rotation matrix for (let i = 0; i < this.dimensions; i++) { for (let j = 0; j < this.dimensions; j++) { this.rotationMatrix![i][j] -= this.learningRate * gradientUpdate[i][j]; } } // Orthogonalize using Gram-Schmidt this.orthogonalize(); } /** * Computes gradient for rotation update. */ private computeRotationGradient( original: number[][], reconstructed: number[][] ): number[][] { const gradient = zerosMatrix(this.dimensions, this.dimensions); for (let i = 0; i < original.length; i++) { const rotatedOrig = matVec(this.rotationMatrix!, original[i]); const error = rotatedOrig.map((v, j) => v - reconstructed[i][j]); for (let j = 0; j < this.dimensions; j++) { for (let k = 0; k < this.dimensions; k++) { gradient[j][k] += error[j] * original[i][k]; } } } // Normalize const scale = 1 / original.length; for (let i = 0; i < this.dimensions; i++) { for (let j = 0; j < this.dimensions; j++) { gradient[i][j] *= scale; } } return gradient; } /** * Orthogonalizes the rotation matrix using modified Gram-Schmidt. */ private orthogonalize(): void { if (!this.rotationMatrix) return; for (let i = 0; i < this.dimensions; i++) { // Normalize column i let n = 0; for (let j = 0; j < this.dimensions; j++) { n += this.rotationMatrix[j][i] * this.rotationMatrix[j][i]; } n = Math.sqrt(n); if (n > 1e-10) { for (let j = 0; j < this.dimensions; j++) { this.rotationMatrix[j][i] /= n; } } // Remove component from remaining columns for (let k = i + 1; k < this.dimensions; k++) { let projection = 0; for (let j = 0; j < this.dimensions; j++) { projection += this.rotationMatrix[j][i] * this.rotationMatrix[j][k]; } for (let j = 0; j < this.dimensions; j++) { this.rotationMatrix[j][k] -= projection * this.rotationMatrix[j][i]; } } } } /** * Encodes vectors with rotation. */ override encode(vectors: number[][]): Uint8Array[] { const rotated = this.rotateVectors(vectors); return super.encode(rotated); } /** * Decodes codes and applies inverse rotation. */ override decode(codes: Uint8Array[]): number[][] { const decoded = super.decode(codes); if (!this.rotationMatrix) { return decoded; } // Apply inverse rotation (transpose for orthogonal matrix) const invRotation = transpose(this.rotationMatrix); return decoded.map(v => matVec(invRotation, v)); } /** * Computes distances with rotation applied to query. */ override computeDistances(query: number[], codes: Uint8Array[]): number[] { const rotatedQuery = this.rotationMatrix ? matVec(this.rotationMatrix, query) : query; return super.computeDistances(rotatedQuery, codes); } /** * Gets the rotation matrix. */ getRotationMatrix(): number[][] | null { return this.rotationMatrix ? this.rotationMatrix.map(r => [...r]) : null; } /** * Sets the rotation matrix directly. */ setRotationMatrix(matrix: number[][]): void { if (matrix.length !== this.dimensions || matrix[0].length !== this.dimensions) { throw new Error(`Expected ${this.dimensions}x${this.dimensions} matrix`); } this.rotationMatrix = matrix.map(r => [...r]); } } // ============================================================================ // SQL Integration // ============================================================================ /** * QuantizationSQL generates SQL for quantized vector operations. * * Provides SQL statements for: * - Creating quantized storage tables * - Inserting quantized vectors * - Searching with quantized distances */ export class QuantizationSQL { /** * Generates SQL for creating a table with quantized vector storage. * * @param tableName - Table name * @param type - Quantization type * @param options - Quantization options * @returns CREATE TABLE SQL statement */ static createQuantizedTable( tableName: string, type: QuantizationType, options?: { dimensions?: number; numSubvectors?: number; idType?: 'SERIAL' | 'BIGSERIAL' | 'UUID'; additionalColumns?: string; } ): string { const { dimensions = 128, numSubvectors = 8, idType = 'BIGSERIAL', additionalColumns = '', } = options ?? {}; let vectorColumn: string; let comment: string; switch (type) { case 'scalar': vectorColumn = `quantized_vector BYTEA NOT NULL`; comment = `Scalar quantized vectors (int8, ${dimensions} dims, 4x compression)`; break; case 'binary': const binaryBytes = Math.ceil(dimensions / 8); vectorColumn = `binary_vector BIT(${dimensions})`; comment = `Binary quantized vectors (${dimensions} dims, ${binaryBytes} bytes, 32x compression)`; break; case 'pq': case 'opq': vectorColumn = `pq_codes BYTEA NOT NULL`; comment = `${type === 'opq' ? 'Optimized ' : ''}Product quantized vectors (M=${numSubvectors}, K=256)`; break; default: throw new Error(`Unknown quantization type: ${type}`); } const extraCols = additionalColumns ? `\n ${additionalColumns},` : ''; return ` -- Table for ${comment} CREATE TABLE IF NOT EXISTS ${tableName} ( id ${idType} PRIMARY KEY,${extraCols} original_vector vector(${dimensions}), -- Optional: keep original for reranking ${vectorColumn}, metadata JSONB, created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP ); -- Index for quantized search CREATE INDEX IF NOT EXISTS idx_${tableName}_quantized ON ${tableName} (quantized_vector); COMMENT ON TABLE ${tableName} IS '${comment}'; `.trim(); } /** * Generates SQL for inserting a quantized vector. * * @param tableName - Table name * @param type - Quantization type * @returns INSERT SQL template with placeholders */ static insertQuantizedSQL(tableName: string, type: QuantizationType): string { const column = type === 'binary' ? 'binary_vector' : (type === 'pq' || type === 'opq') ? 'pq_codes' : 'quantized_vector'; return ` INSERT INTO ${tableName} (original_vector, ${column}, metadata) VALUES ($1::vector, $2, $3::jsonb) RETURNING id; `.trim(); } /** * Generates SQL for batch insert of quantized vectors. * * @param tableName - Table name * @param type - Quantization type * @param count - Number of vectors * @returns Batch INSERT SQL */ static batchInsertSQL( tableName: string, type: QuantizationType, count: number ): string { const column = type === 'binary' ? 'binary_vector' : (type === 'pq' || type === 'opq') ? 'pq_codes' : 'quantized_vector'; const values = Array.from({ length: count }, (_, i) => { const offset = i * 3; return `($${offset + 1}::vector, $${offset + 2}, $${offset + 3}::jsonb)`; }).join(',\n '); return ` INSERT INTO ${tableName} (original_vector, ${column}, metadata) VALUES ${values} RETURNING id; `.trim(); } /** * Generates SQL for scalar quantized search. * * @param tableName - Table name * @param k - Number of results * @param useReranking - Whether to rerank with original vectors * @returns Search SQL template */ static scalarSearchSQL( tableName: string, k: number, useReranking: boolean = true ): string { if (useReranking) { // Two-stage search: filter with quantized, rerank with original const filterK = k * 10; return ` WITH candidates AS ( SELECT id, original_vector, metadata, ruvector_scalar_distance(quantized_vector, $1::bytea) AS approx_dist FROM ${tableName} ORDER BY approx_dist ASC LIMIT ${filterK} ) SELECT id, metadata, original_vector <-> $2::vector AS exact_dist FROM candidates ORDER BY exact_dist ASC LIMIT ${k}; `.trim(); } return ` SELECT id, metadata, ruvector_scalar_distance(quantized_vector, $1::bytea) AS distance FROM ${tableName} ORDER BY distance ASC LIMIT ${k}; `.trim(); } /** * Generates SQL for binary quantized search with Hamming distance. * * @param tableName - Table name * @param k - Number of results * @param useReranking - Whether to rerank with original vectors * @returns Search SQL template */ static binarySearchSQL( tableName: string, k: number, useReranking: boolean = true ): string { if (useReranking) { const filterK = k * 10; return ` WITH candidates AS ( SELECT id, original_vector, metadata, bit_count(binary_vector # $1::bit) AS hamming_dist FROM ${tableName} ORDER BY hamming_dist ASC LIMIT ${filterK} ) SELECT id, metadata, original_vector <-> $2::vector AS exact_dist FROM candidates ORDER BY exact_dist ASC LIMIT ${k}; `.trim(); } return ` SELECT id, metadata, bit_count(binary_vector # $1::bit) AS hamming_distance FROM ${tableName} ORDER BY hamming_distance ASC LIMIT ${k}; `.trim(); } /** * Generates SQL for PQ search using distance lookup tables. * * @param tableName - Table name * @param k - Number of results * @param numSubvectors - Number of PQ subvectors * @param useReranking - Whether to rerank * @returns Search SQL template */ static pqSearchSQL( tableName: string, k: number, numSubvectors: number = 8, useReranking: boolean = true ): string { // Generate SQL for lookup table based distance computation const distanceTerms = Array.from( { length: numSubvectors }, (_, m) => `ruvector_pq_subvector_dist($1, ${m}, get_byte(pq_codes, ${m}))` ).join(' + '); if (useReranking) { const filterK = k * 10; return ` WITH candidates AS ( SELECT id, original_vector, metadata, sqrt(${distanceTerms}) AS approx_dist FROM ${tableName} ORDER BY approx_dist ASC LIMIT ${filterK} ) SELECT id, metadata, original_vector <-> $2::vector AS exact_dist FROM candidates ORDER BY exact_dist ASC LIMIT ${k}; `.trim(); } return ` SELECT id, metadata, sqrt(${distanceTerms}) AS distance FROM ${tableName} ORDER BY distance ASC LIMIT ${k}; `.trim(); } /** * Generates SQL for creating PQ lookup tables. * * @param tableName - Lookup table name * @param numSubvectors - Number of subvectors (M) * @param numCentroids - Number of centroids (K) * @returns CREATE TABLE SQL for lookup tables */ static createPQLookupTables( tableName: string, numSubvectors: number = 8, numCentroids: number = 256 ): string { return ` -- PQ codebooks storage CREATE TABLE IF NOT EXISTS ${tableName}_codebooks ( subvector_id INTEGER NOT NULL, centroid_id INTEGER NOT NULL, centroid vector NOT NULL, PRIMARY KEY (subvector_id, centroid_id) ); -- Precomputed distance lookup (for specific queries) CREATE TABLE IF NOT EXISTS ${tableName}_distance_lookup ( query_id BIGINT NOT NULL, subvector_id INTEGER NOT NULL, centroid_id INTEGER NOT NULL, squared_distance REAL NOT NULL, PRIMARY KEY (query_id, subvector_id, centroid_id) ); CREATE INDEX IF NOT EXISTS idx_${tableName}_lookup_query ON ${tableName}_distance_lookup (query_id, subvector_id); COMMENT ON TABLE ${tableName}_codebooks IS 'PQ codebooks: M=${numSubvectors}, K=${numCentroids}'; `.trim(); } /** * Generates SQL for inserting PQ codebooks. * * @param tableName - Base table name * @param codebooks - Trained codebooks * @returns INSERT SQL for codebooks */ static insertCodebooksSQL( tableName: string, codebooks: Array<{ centroids: number[][] }> ): string { const values: string[] = []; for (let m = 0; m < codebooks.length; m++) { for (let k = 0; k < codebooks[m].centroids.length; k++) { const centroidStr = `'[${codebooks[m].centroids[k].join(',')}]'`; values.push(`(${m}, ${k}, ${centroidStr}::vector)`); } } return ` INSERT INTO ${tableName}_codebooks (subvector_id, centroid_id, centroid) VALUES ${values.join(',\n ')} ON CONFLICT (subvector_id, centroid_id) DO UPDATE SET centroid = EXCLUDED.centroid; `.trim(); } /** * Generates SQL function for computing PQ distance. * * @param functionName - Function name * @param numSubvectors - Number of subvectors * @returns CREATE FUNCTION SQL */ static createPQDistanceFunction( functionName: string = 'pq_asymmetric_distance', numSubvectors: number = 8 ): string { return ` CREATE OR REPLACE FUNCTION ${functionName}( query_vector vector, pq_codes bytea, codebook_table text ) RETURNS real AS $$ DECLARE total_distance real := 0; m integer; code integer; subvector_dim integer; query_subvector vector; centroid vector; BEGIN subvector_dim := vector_dims(query_vector) / ${numSubvectors}; FOR m IN 0..${numSubvectors - 1} LOOP code := get_byte(pq_codes, m); -- Extract query subvector query_subvector := vector_slice(query_vector, m * subvector_dim, (m + 1) * subvector_dim); -- Get centroid from codebook EXECUTE format('SELECT centroid FROM %I WHERE subvector_id = $1 AND centroid_id = $2', codebook_table || '_codebooks') INTO centroid USING m, code; -- Add squared distance total_distance := total_distance + (query_subvector <-> centroid)^2; END LOOP; RETURN sqrt(total_distance); END; $$ LANGUAGE plpgsql IMMUTABLE; `.trim(); } /** * Generates SQL for OPQ with rotation. * * @param tableName - Table name * @param dimensions - Vector dimensions * @returns SQL for rotation matrix storage */ static createOPQRotationTable(tableName: string, dimensions: number): string { return ` -- OPQ rotation matrix storage CREATE TABLE IF NOT EXISTS ${tableName}_rotation ( row_id INTEGER NOT NULL, col_id INTEGER NOT NULL, value REAL NOT NULL, PRIMARY KEY (row_id, col_id) ); -- Function to apply rotation CREATE OR REPLACE FUNCTION ${tableName}_rotate_vector(v vector) RETURNS vector AS $$ DECLARE result float8[]; i integer; sum float8; j integer; BEGIN result := array_fill(0::float8, ARRAY[${dimensions}]); FOR i IN 0..${dimensions - 1} LOOP sum := 0; FOR j IN 0..${dimensions - 1} LOOP SELECT sum + r.value * v[j+1] INTO sum FROM ${tableName}_rotation r WHERE r.row_id = i AND r.col_id = j; END LOOP; result[i+1] := sum; END LOOP; RETURN result::vector; END; $$ LANGUAGE plpgsql IMMUTABLE; COMMENT ON TABLE ${tableName}_rotation IS 'OPQ rotation matrix (${dimensions}x${dimensions})'; `.trim(); } /** * Generates SQL for quantization statistics view. * * @param tableName - Base table name * @returns CREATE VIEW SQL */ static createStatsView(tableName: string): string { return ` CREATE OR REPLACE VIEW ${tableName}_quantization_stats AS SELECT pg_total_relation_size('${tableName}'::regclass) AS total_size_bytes, pg_relation_size('${tableName}'::regclass) AS table_size_bytes, pg_indexes_size('${tableName}'::regclass) AS index_size_bytes, (SELECT count(*) FROM ${tableName}) AS row_count, CASE WHEN (SELECT count(*) FROM ${tableName}) > 0 THEN pg_relation_size('${tableName}'::regclass)::float / (SELECT count(*) FROM ${tableName}) ELSE 0 END AS avg_bytes_per_row; `.trim(); } } // ============================================================================ // Factory and Utilities // ============================================================================ /** * Creates a quantizer based on the specified type. * * @param type - Quantization type * @param options - Type-specific options * @returns Configured quantizer instance * * @example * ```typescript * const scalar = createQuantizer('scalar', { dimensions: 128 }); * const binary = createQuantizer('binary', { dimensions: 128 }); * const pq = createQuantizer('pq', { dimensions: 128, numSubvectors: 8, numCentroids: 256 }); * ``` */ export function createQuantizer( type: 'scalar', options: ScalarQuantizationOptions ): ScalarQuantizer; export function createQuantizer( type: 'binary', options: BinaryQuantizationOptions ): BinaryQuantizer; export function createQuantizer( type: 'pq', options: ProductQuantizationOptions ): ProductQuantizer; export function createQuantizer( type: 'opq', options: OptimizedProductQuantizationOptions ): OptimizedProductQuantizer; export function createQuantizer( type: QuantizationType, options?: QuantizationOptions ): IQuantizer; export function createQuantizer( type: QuantizationType, options?: QuantizationOptions ): IQuantizer { switch (type) { case 'sca