UNPKG

claude-flow

Version:

Ruflo - Enterprise AI agent orchestration for Claude Code. Deploy 60+ specialized agents in coordinated swarms with self-learning, fault-tolerant consensus, vector memory, and MCP integration

681 lines (585 loc) 22.9 kB
/** * RuVector PostgreSQL Bridge - Quantization Example * * This example demonstrates: * - Comparing different quantization methods * - Measuring recall vs compression trade-offs * - Production configuration recommendations * - Memory optimization strategies * * Run with: npx ts-node examples/ruvector/quantization.ts * * @module @claude-flow/plugins/examples/ruvector/quantization */ import { createRuVectorBridge, type RuVectorBridge, type VectorRecord, } from '../../src/integrations/ruvector/index.js'; // ============================================================================ // Configuration // ============================================================================ const config = { connection: { host: process.env.POSTGRES_HOST || 'localhost', port: parseInt(process.env.POSTGRES_PORT || '5432', 10), database: process.env.POSTGRES_DB || 'vectors', user: process.env.POSTGRES_USER || 'postgres', password: process.env.POSTGRES_PASSWORD || 'postgres', }, dimensions: 768, // Typical embedding dimension testVectors: 10000, // Number of test vectors queryVectors: 100, // Number of query vectors k: 10, // Top-k for recall calculation }; // ============================================================================ // Quantization Types // ============================================================================ type QuantizationMethod = 'none' | 'int8' | 'int4' | 'binary' | 'pq'; interface QuantizationConfig { method: QuantizationMethod; name: string; bitsPerComponent: number; description: string; } const quantizationMethods: QuantizationConfig[] = [ { method: 'none', name: 'Float32 (No Quantization)', bitsPerComponent: 32, description: 'Full precision floating point', }, { method: 'int8', name: 'Int8 Scalar Quantization', bitsPerComponent: 8, description: '4x compression, ~99% recall', }, { method: 'int4', name: 'Int4 Scalar Quantization', bitsPerComponent: 4, description: '8x compression, ~95% recall', }, { method: 'binary', name: 'Binary Quantization', bitsPerComponent: 1, description: '32x compression, ~85% recall', }, { method: 'pq', name: 'Product Quantization (PQ)', bitsPerComponent: 8, // per subvector description: 'Adaptive compression, good for high dimensions', }, ]; // ============================================================================ // Quantization Implementation // ============================================================================ /** * Scalar quantization to Int8. */ function quantizeInt8(vector: number[]): { quantized: Int8Array; scale: number; offset: number } { const min = Math.min(...vector); const max = Math.max(...vector); const scale = (max - min) / 255; const offset = min; const quantized = new Int8Array(vector.length); for (let i = 0; i < vector.length; i++) { quantized[i] = Math.round((vector[i] - offset) / scale) - 128; } return { quantized, scale, offset }; } /** * Dequantize Int8 back to float. */ function dequantizeInt8(data: { quantized: Int8Array; scale: number; offset: number }): number[] { const result = new Array(data.quantized.length); for (let i = 0; i < data.quantized.length; i++) { result[i] = (data.quantized[i] + 128) * data.scale + data.offset; } return result; } /** * Scalar quantization to Int4 (packed). */ function quantizeInt4(vector: number[]): { quantized: Uint8Array; scale: number; offset: number } { const min = Math.min(...vector); const max = Math.max(...vector); const scale = (max - min) / 15; const offset = min; // Pack two Int4 values into one byte const packedLength = Math.ceil(vector.length / 2); const quantized = new Uint8Array(packedLength); for (let i = 0; i < vector.length; i += 2) { const v1 = Math.round((vector[i] - offset) / scale) & 0x0F; const v2 = i + 1 < vector.length ? Math.round((vector[i + 1] - offset) / scale) & 0x0F : 0; quantized[i / 2] = (v1 << 4) | v2; } return { quantized, scale, offset }; } /** * Dequantize Int4 back to float. */ function dequantizeInt4( data: { quantized: Uint8Array; scale: number; offset: number }, originalLength: number ): number[] { const result = new Array(originalLength); for (let i = 0; i < originalLength; i += 2) { const packed = data.quantized[i / 2]; result[i] = ((packed >> 4) & 0x0F) * data.scale + data.offset; if (i + 1 < originalLength) { result[i + 1] = (packed & 0x0F) * data.scale + data.offset; } } return result; } /** * Binary quantization (sign bit only). */ function quantizeBinary(vector: number[]): Uint8Array { const packedLength = Math.ceil(vector.length / 8); const quantized = new Uint8Array(packedLength); for (let i = 0; i < vector.length; i++) { if (vector[i] >= 0) { quantized[Math.floor(i / 8)] |= (1 << (7 - (i % 8))); } } return quantized; } /** * Compute Hamming distance for binary vectors. */ function hammingDistance(a: Uint8Array, b: Uint8Array): number { let distance = 0; for (let i = 0; i < a.length; i++) { // Count differing bits let xor = a[i] ^ b[i]; while (xor) { distance += xor & 1; xor >>= 1; } } return distance; } /** * Product Quantization (simplified). */ class ProductQuantizer { private numSubvectors: number; private subvectorDim: number; private codebooks: number[][][]; // [subvector][centroid][dimension] private numCentroids: number = 256; constructor(dimension: number, numSubvectors: number = 8) { this.numSubvectors = numSubvectors; this.subvectorDim = Math.ceil(dimension / numSubvectors); this.codebooks = []; // Initialize random codebooks (in production, train on data) for (let m = 0; m < numSubvectors; m++) { const codebook: number[][] = []; for (let c = 0; c < this.numCentroids; c++) { const centroid = Array.from( { length: this.subvectorDim }, () => Math.random() * 2 - 1 ); codebook.push(centroid); } this.codebooks.push(codebook); } } encode(vector: number[]): Uint8Array { const codes = new Uint8Array(this.numSubvectors); for (let m = 0; m < this.numSubvectors; m++) { const start = m * this.subvectorDim; const end = Math.min(start + this.subvectorDim, vector.length); const subvector = vector.slice(start, end); // Pad if necessary while (subvector.length < this.subvectorDim) { subvector.push(0); } // Find nearest centroid let minDist = Infinity; let minIdx = 0; for (let c = 0; c < this.numCentroids; c++) { const dist = this.euclideanDistance(subvector, this.codebooks[m][c]); if (dist < minDist) { minDist = dist; minIdx = c; } } codes[m] = minIdx; } return codes; } decode(codes: Uint8Array): number[] { const result: number[] = []; for (let m = 0; m < this.numSubvectors; m++) { const centroid = this.codebooks[m][codes[m]]; result.push(...centroid); } return result.slice(0, this.numSubvectors * this.subvectorDim); } private euclideanDistance(a: number[], b: number[]): number { let sum = 0; for (let i = 0; i < a.length; i++) { const diff = a[i] - b[i]; sum += diff * diff; } return Math.sqrt(sum); } } // ============================================================================ // Evaluation Functions // ============================================================================ /** * Compute cosine similarity. */ function cosineSimilarity(a: number[], b: number[]): number { let dot = 0, magA = 0, magB = 0; for (let i = 0; i < a.length; i++) { dot += a[i] * b[i]; magA += a[i] * a[i]; magB += b[i] * b[i]; } return dot / (Math.sqrt(magA) * Math.sqrt(magB)); } /** * Generate random normalized vectors. */ function generateVectors(count: number, dim: number): number[][] { const vectors: number[][] = []; for (let i = 0; i < count; i++) { const vec = Array.from({ length: dim }, () => Math.random() * 2 - 1); const mag = Math.sqrt(vec.reduce((s, v) => s + v * v, 0)); vectors.push(vec.map(v => v / mag)); } return vectors; } /** * Find ground truth top-k by exact search. */ function exactTopK(query: number[], vectors: number[][], k: number): number[] { const distances = vectors.map((v, i) => ({ index: i, similarity: cosineSimilarity(query, v), })); distances.sort((a, b) => b.similarity - a.similarity); return distances.slice(0, k).map(d => d.index); } /** * Calculate recall@k. */ function calculateRecall(groundTruth: number[], predicted: number[]): number { const gtSet = new Set(groundTruth); const overlap = predicted.filter(p => gtSet.has(p)).length; return overlap / groundTruth.length; } // ============================================================================ // Main Example // ============================================================================ async function main(): Promise<void> { console.log('RuVector PostgreSQL Bridge - Quantization Example'); console.log('===================================================\n'); const bridge: RuVectorBridge = createRuVectorBridge({ connectionString: `postgresql://${config.connection.user}:${config.connection.password}@${config.connection.host}:${config.connection.port}/${config.connection.database}`, }); try { await bridge.connect(); console.log('Connected to PostgreSQL\n'); // ======================================================================== // 1. Generate Test Data // ======================================================================== console.log('1. Generating test data...'); console.log(' ' + '-'.repeat(50)); const vectors = generateVectors(config.testVectors, config.dimensions); const queries = generateVectors(config.queryVectors, config.dimensions); console.log(` Generated ${config.testVectors.toLocaleString()} test vectors`); console.log(` Generated ${config.queryVectors} query vectors`); console.log(` Dimensions: ${config.dimensions}`); console.log(); // ======================================================================== // 2. Compute Ground Truth // ======================================================================== console.log('2. Computing ground truth (exact search)...'); console.log(' ' + '-'.repeat(50)); const startGT = performance.now(); const groundTruths = queries.map(q => exactTopK(q, vectors, config.k)); const gtTime = performance.now() - startGT; console.log(` Ground truth computed in ${gtTime.toFixed(2)}ms`); console.log(` Average time per query: ${(gtTime / config.queryVectors).toFixed(2)}ms`); console.log(); // ======================================================================== // 3. Compare Quantization Methods // ======================================================================== console.log('3. Comparing Quantization Methods'); console.log(' ' + '-'.repeat(70)); console.log(' Method | Compression | Recall@10 | Query Time | Mem/Vector'); console.log(' ' + '-'.repeat(70)); const results: Array<{ method: string; compression: number; recall: number; queryTimeMs: number; bytesPerVector: number; }> = []; // Test each quantization method for (const qConfig of quantizationMethods) { let quantizedVectors: any[]; let queryFn: (query: number[], vectors: any[], k: number) => number[]; let bytesPerVector: number; switch (qConfig.method) { case 'none': quantizedVectors = vectors; queryFn = (q, vecs, k) => exactTopK(q, vecs, k); bytesPerVector = config.dimensions * 4; break; case 'int8': quantizedVectors = vectors.map(v => ({ original: v, ...quantizeInt8(v), })); queryFn = (q, vecs, k) => { const queryQ = quantizeInt8(q); const distances = vecs.map((v: any, i: number) => ({ index: i, similarity: cosineSimilarity( dequantizeInt8({ quantized: v.quantized, scale: v.scale, offset: v.offset }), dequantizeInt8(queryQ) ), })); distances.sort((a: any, b: any) => b.similarity - a.similarity); return distances.slice(0, k).map((d: any) => d.index); }; bytesPerVector = config.dimensions * 1 + 8; // quantized + scale + offset break; case 'int4': quantizedVectors = vectors.map(v => ({ original: v, ...quantizeInt4(v), originalLength: v.length, })); queryFn = (q, vecs, k) => { const queryQ = quantizeInt4(q); const distances = vecs.map((v: any, i: number) => ({ index: i, similarity: cosineSimilarity( dequantizeInt4( { quantized: v.quantized, scale: v.scale, offset: v.offset }, v.originalLength ), dequantizeInt4(queryQ, q.length) ), })); distances.sort((a: any, b: any) => b.similarity - a.similarity); return distances.slice(0, k).map((d: any) => d.index); }; bytesPerVector = Math.ceil(config.dimensions / 2) + 8; break; case 'binary': quantizedVectors = vectors.map(v => ({ original: v, binary: quantizeBinary(v), })); queryFn = (q, vecs, k) => { const queryB = quantizeBinary(q); const distances = vecs.map((v: any, i: number) => ({ index: i, // Lower Hamming distance = more similar distance: hammingDistance(v.binary, queryB), })); distances.sort((a: any, b: any) => a.distance - b.distance); return distances.slice(0, k).map((d: any) => d.index); }; bytesPerVector = Math.ceil(config.dimensions / 8); break; case 'pq': const pq = new ProductQuantizer(config.dimensions, 8); quantizedVectors = vectors.map(v => ({ original: v, codes: pq.encode(v), pq, })); queryFn = (q, vecs, k) => { const distances = vecs.map((v: any, i: number) => ({ index: i, similarity: cosineSimilarity(v.pq.decode(v.codes), q), })); distances.sort((a: any, b: any) => b.similarity - a.similarity); return distances.slice(0, k).map((d: any) => d.index); }; bytesPerVector = 8; // 8 subvectors, 1 byte each break; default: continue; } // Measure recall and query time const recalls: number[] = []; const startQuery = performance.now(); for (let i = 0; i < queries.length; i++) { const predicted = queryFn(queries[i], quantizedVectors, config.k); recalls.push(calculateRecall(groundTruths[i], predicted)); } const queryTime = (performance.now() - startQuery) / queries.length; const avgRecall = recalls.reduce((a, b) => a + b, 0) / recalls.length; const compression = (config.dimensions * 4) / bytesPerVector; results.push({ method: qConfig.name, compression, recall: avgRecall, queryTimeMs: queryTime, bytesPerVector, }); console.log( ` ${qConfig.name.padEnd(30)} | ` + `${compression.toFixed(1).padStart(6)}x | ` + `${(avgRecall * 100).toFixed(1).padStart(6)}% | ` + `${queryTime.toFixed(2).padStart(8)}ms | ` + `${bytesPerVector.toString().padStart(5)} B` ); } console.log(); // ======================================================================== // 4. Memory Savings Analysis // ======================================================================== console.log('4. Memory Savings Analysis'); console.log(' ' + '-'.repeat(50)); const baseMemory = config.testVectors * config.dimensions * 4 / (1024 * 1024); console.log(` Base memory (Float32): ${baseMemory.toFixed(2)} MB`); console.log('\n Memory usage by method:'); results.forEach(r => { const memory = config.testVectors * r.bytesPerVector / (1024 * 1024); const savings = ((baseMemory - memory) / baseMemory * 100); console.log( ` ${r.method.padEnd(30)}: ${memory.toFixed(2).padStart(6)} MB ` + `(${savings.toFixed(1)}% reduction)` ); }); console.log(); // ======================================================================== // 5. Recall vs Compression Trade-off // ======================================================================== console.log('5. Recall vs Compression Trade-off'); console.log(' ' + '-'.repeat(50)); console.log(' Visual representation (Compression -> Recall):'); console.log(); results.forEach(r => { const compressionBar = '='.repeat(Math.floor(r.compression * 2)); const recallBar = '*'.repeat(Math.floor(r.recall * 50)); console.log(` ${r.method.slice(0, 20).padEnd(20)}`); console.log(` Compression: ${compressionBar} ${r.compression.toFixed(1)}x`); console.log(` Recall: ${recallBar} ${(r.recall * 100).toFixed(1)}%`); console.log(); }); // ======================================================================== // 6. Production Recommendations // ======================================================================== console.log('6. Production Recommendations'); console.log(' ' + '-'.repeat(50)); console.log('\n Use Case Recommendations:'); console.log('\n High Accuracy (recall > 99%):'); console.log(' - Method: Int8 Scalar Quantization'); console.log(' - Compression: 4x'); console.log(' - Best for: RAG, semantic search, recommendations'); console.log('\n Balanced (recall > 95%):'); console.log(' - Method: Int4 Scalar Quantization'); console.log(' - Compression: 8x'); console.log(' - Best for: Large-scale similarity search'); console.log('\n Maximum Compression (recall > 85%):'); console.log(' - Method: Binary Quantization'); console.log(' - Compression: 32x'); console.log(' - Best for: Candidate generation, first-pass filtering'); console.log('\n High-Dimensional Data:'); console.log(' - Method: Product Quantization (PQ)'); console.log(' - Compression: Variable (8-64x typical)'); console.log(' - Best for: Embeddings > 512 dimensions'); // ======================================================================== // 7. PostgreSQL Integration Notes // ======================================================================== console.log('\n7. PostgreSQL Integration Notes'); console.log(' ' + '-'.repeat(50)); console.log('\n pgvector supports:'); console.log(' - halfvec (Float16): 2x compression, ~99.9% recall'); console.log(' - sparsevec: For sparse vectors'); console.log(' - HNSW with quantization: Index-level compression'); console.log('\n Example SQL for halfvec:'); console.log(' CREATE TABLE items ('); console.log(' id bigserial PRIMARY KEY,'); console.log(' embedding halfvec(768) -- Float16 storage'); console.log(' );'); console.log('\n Example SQL for quantized index:'); console.log(' CREATE INDEX ON items USING hnsw ('); console.log(' (embedding::halfvec(768)) halfvec_l2_ops'); console.log(' );'); // ======================================================================== // 8. Store Quantized Vectors (Demo) // ======================================================================== console.log('\n8. Storing Vectors with Different Precisions'); console.log(' ' + '-'.repeat(50)); // Create collections for different precisions const collections = ['vectors_float32', 'vectors_int8_sim']; for (const collection of collections) { await bridge.createCollection(collection, { dimensions: config.dimensions, distanceMetric: 'cosine', indexType: 'hnsw', }); } // Insert sample vectors const sampleSize = 1000; console.log(`\n Inserting ${sampleSize} vectors to each collection...`); // Float32 (original) const float32Start = performance.now(); for (let i = 0; i < sampleSize; i++) { await bridge.insert('vectors_float32', { id: `float32_${i}`, embedding: vectors[i], metadata: { precision: 'float32' }, }); } const float32Time = performance.now() - float32Start; // Simulated Int8 (stored as float but simulating quantization overhead) const int8Start = performance.now(); for (let i = 0; i < sampleSize; i++) { const q = quantizeInt8(vectors[i]); const dequantized = dequantizeInt8(q); await bridge.insert('vectors_int8_sim', { id: `int8_${i}`, embedding: dequantized, metadata: { precision: 'int8_simulated', scale: q.scale, offset: q.offset }, }); } const int8Time = performance.now() - int8Start; console.log(` Float32 insert time: ${float32Time.toFixed(2)}ms`); console.log(` Int8 (simulated) insert time: ${int8Time.toFixed(2)}ms`); // Compare search results const testQuery = queries[0]; const float32Results = await bridge.search('vectors_float32', testQuery, { k: 10, includeDistance: true, }); const int8Results = await bridge.search('vectors_int8_sim', testQuery, { k: 10, includeDistance: true, }); console.log('\n Search result comparison (first 5):'); console.log(' Float32 IDs: ' + float32Results.slice(0, 5).map(r => r.id).join(', ')); console.log(' Int8 (sim) IDs: ' + int8Results.slice(0, 5).map(r => r.id.replace('int8', 'float32')).join(', ')); // ======================================================================== // Done // ======================================================================== console.log('\n' + '='.repeat(55)); console.log('Quantization example completed!'); console.log('='.repeat(55)); } catch (error) { console.error('Error:', error); throw error; } finally { await bridge.disconnect(); console.log('\nDisconnected from PostgreSQL.'); } } main().catch(console.error);