UNPKG

@ruvector/postgres-cli

Version:

Advanced AI vector database CLI for PostgreSQL - pgvector drop-in replacement with 53+ SQL functions, 39 attention mechanisms, GNN layers, hyperbolic embeddings, and self-learning capabilities

646 lines (645 loc) 28.9 kB
/** * RuVector PostgreSQL Client * Comprehensive wrapper for PostgreSQL connections with RuVector extension * * Features: * - Connection pooling with configurable limits * - Automatic retry with exponential backoff * - Batch operations for bulk inserts * - SQL injection protection * - Input validation */ import pg from 'pg'; const { Pool } = pg; const DEFAULT_POOL_CONFIG = { maxConnections: 10, idleTimeoutMs: 30000, connectionTimeoutMs: 5000, statementTimeoutMs: 30000, }; const DEFAULT_RETRY_CONFIG = { maxRetries: 3, baseDelayMs: 100, maxDelayMs: 5000, }; // ============================================================================ // Utility Functions // ============================================================================ /** * Validate identifier (table/column name) to prevent SQL injection */ function validateIdentifier(name) { if (!/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(name)) { throw new Error(`Invalid identifier: ${name}. Must be alphanumeric with underscores.`); } if (name.length > 63) { throw new Error(`Identifier too long: ${name}. Max 63 characters.`); } return name; } /** * Quote identifier for safe SQL usage */ function quoteIdentifier(name) { return `"${validateIdentifier(name).replace(/"/g, '""')}"`; } /** * Validate vector dimensions */ function validateVector(vector, expectedDim) { if (!Array.isArray(vector)) { throw new Error('Vector must be an array'); } if (vector.length === 0) { throw new Error('Vector cannot be empty'); } if (vector.some(v => typeof v !== 'number' || !Number.isFinite(v))) { throw new Error('Vector must contain only finite numbers'); } if (expectedDim !== undefined && vector.length !== expectedDim) { throw new Error(`Vector dimension mismatch: expected ${expectedDim}, got ${vector.length}`); } } /** * Sleep for exponential backoff */ function sleep(ms) { return new Promise(resolve => setTimeout(resolve, ms)); } /** * Check if error is retryable */ function isRetryableError(err) { const code = err.code; // Retryable PostgreSQL error codes const retryableCodes = [ '08000', // connection_exception '08003', // connection_does_not_exist '08006', // connection_failure '40001', // serialization_failure '40P01', // deadlock_detected '57P01', // admin_shutdown '57P02', // crash_shutdown '57P03', // cannot_connect_now ]; return code !== undefined && retryableCodes.includes(code); } export class RuVectorClient { pool = null; connectionString; poolConfig; retryConfig; constructor(connectionString, poolConfig, retryConfig) { this.connectionString = connectionString; this.poolConfig = { ...DEFAULT_POOL_CONFIG, ...poolConfig }; this.retryConfig = { ...DEFAULT_RETRY_CONFIG, ...retryConfig }; } async connect() { this.pool = new Pool({ connectionString: this.connectionString, max: this.poolConfig.maxConnections, idleTimeoutMillis: this.poolConfig.idleTimeoutMs, connectionTimeoutMillis: this.poolConfig.connectionTimeoutMs, }); // Test connection and set statement timeout const client = await this.pool.connect(); try { await client.query(`SET statement_timeout = ${this.poolConfig.statementTimeoutMs}`); } finally { client.release(); } } async disconnect() { if (this.pool) { await this.pool.end(); this.pool = null; } } /** * Execute query with automatic retry on transient errors */ async queryWithRetry(sql, params) { if (!this.pool) { throw new Error('Not connected to database'); } let lastError = null; for (let attempt = 0; attempt <= this.retryConfig.maxRetries; attempt++) { try { return await this.pool.query(sql, params); } catch (err) { lastError = err; if (!isRetryableError(lastError) || attempt === this.retryConfig.maxRetries) { throw lastError; } // Exponential backoff with jitter const delay = Math.min(this.retryConfig.baseDelayMs * Math.pow(2, attempt) + Math.random() * 100, this.retryConfig.maxDelayMs); await sleep(delay); } } throw lastError; } async query(sql, params) { const result = await this.queryWithRetry(sql, params); return result.rows; } async execute(sql, params) { await this.queryWithRetry(sql, params); } /** * Execute multiple statements in a transaction */ async transaction(fn) { if (!this.pool) { throw new Error('Not connected to database'); } const client = await this.pool.connect(); try { await client.query('BEGIN'); const result = await fn(client); await client.query('COMMIT'); return result; } catch (err) { await client.query('ROLLBACK'); throw err; } finally { client.release(); } } // ============================================================================ // Extension Info // ============================================================================ async getExtensionInfo() { const versionResult = await this.query("SELECT extversion as version FROM pg_extension WHERE extname = 'ruvector'"); const version = versionResult[0]?.version || 'unknown'; // Get SIMD info let simd_info; try { const simdResult = await this.query('SELECT ruvector_simd_info()'); simd_info = simdResult[0]?.ruvector_simd_info; } catch { // Function may not exist } const features = []; const featureChecks = [ { name: 'Vector Operations', check: "SELECT 1 FROM pg_proc WHERE proname = 'ruvector_l2_distance'" }, { name: 'HNSW Index', check: "SELECT 1 FROM pg_am WHERE amname = 'hnsw'" }, { name: 'IVFFlat Index', check: "SELECT 1 FROM pg_am WHERE amname = 'ivfflat'" }, { name: 'Attention Mechanisms', check: "SELECT 1 FROM pg_proc WHERE proname = 'ruvector_attention_score'" }, { name: 'GNN Layers', check: "SELECT 1 FROM pg_proc WHERE proname = 'ruvector_gcn_forward'" }, { name: 'Graph/Cypher', check: "SELECT 1 FROM pg_proc WHERE proname = 'ruvector_cypher'" }, { name: 'Self-Learning', check: "SELECT 1 FROM pg_proc WHERE proname = 'ruvector_enable_learning'" }, { name: 'Hyperbolic Embeddings', check: "SELECT 1 FROM pg_proc WHERE proname = 'ruvector_poincare_distance'" }, { name: 'Sparse Vectors', check: "SELECT 1 FROM pg_proc WHERE proname = 'ruvector_sparse_bm25'" }, { name: 'Agent Routing', check: "SELECT 1 FROM pg_proc WHERE proname = 'ruvector_route'" }, { name: 'Quantization', check: "SELECT 1 FROM pg_proc WHERE proname = 'binary_quantize_arr'" }, ]; for (const { name, check } of featureChecks) { try { const result = await this.query(check); if (result.length > 0) { features.push(name); } } catch { // Feature not available } } return { version, features, simd_info }; } async installExtension(upgrade = false) { if (upgrade) { await this.execute('ALTER EXTENSION ruvector UPDATE'); } else { await this.execute('CREATE EXTENSION IF NOT EXISTS ruvector CASCADE'); } } async getMemoryStats() { const result = await this.query('SELECT ruvector_memory_stats()'); return result[0]?.ruvector_memory_stats || { index_memory_mb: 0, vector_cache_mb: 0, quantization_tables_mb: 0, total_extension_mb: 0, }; } // ============================================================================ // Vector Operations // ============================================================================ async createVectorTable(name, dimensions, indexType = 'hnsw') { const safeName = quoteIdentifier(name); const safeIdxName = quoteIdentifier(`${name}_id_idx`); if (dimensions < 1 || dimensions > 65535) { throw new Error('Dimensions must be between 1 and 65535'); } // Use ruvector type (native RuVector extension type) // ruvector is a variable-length type, dimensions stored in metadata await this.execute(` CREATE TABLE IF NOT EXISTS ${safeName} ( id SERIAL PRIMARY KEY, embedding ruvector, dimensions INT DEFAULT $1, metadata JSONB, created_at TIMESTAMPTZ DEFAULT NOW() ) `, [dimensions]); // Note: HNSW/IVFFlat indexes require additional index implementation // For now, create a simple btree index on id for fast lookups await this.execute(` CREATE INDEX IF NOT EXISTS ${safeIdxName} ON ${safeName} (id) `); } async insertVector(table, vector, metadata) { validateVector(vector); const safeName = quoteIdentifier(table); const result = await this.query(`INSERT INTO ${safeName} (embedding, metadata) VALUES ($1::ruvector, $2) RETURNING id`, [`[${vector.join(',')}]`, metadata ? JSON.stringify(metadata) : null]); return result[0].id; } /** * Batch insert vectors (10-100x faster than individual inserts) */ async insertVectorsBatch(table, vectors, batchSize = 100) { const safeName = quoteIdentifier(table); const ids = []; // Process in batches for (let i = 0; i < vectors.length; i += batchSize) { const batch = vectors.slice(i, i + batchSize); // Validate all vectors in batch for (const item of batch) { validateVector(item.vector); } // Build multi-row INSERT const values = []; const placeholders = []; batch.forEach((item, idx) => { const base = idx * 2; placeholders.push(`($${base + 1}::ruvector, $${base + 2})`); values.push(`[${item.vector.join(',')}]`); values.push(item.metadata ? JSON.stringify(item.metadata) : null); }); const result = await this.query(`INSERT INTO ${safeName} (embedding, metadata) VALUES ${placeholders.join(', ')} RETURNING id`, values); ids.push(...result.map(r => r.id)); } return ids; } async searchVectors(table, query, topK = 10, metric = 'cosine') { validateVector(query); const safeName = quoteIdentifier(table); const distanceOp = metric === 'cosine' ? '<=>' : metric === 'l2' ? '<->' : '<#>'; const results = await this.query(`SELECT id, embedding ${distanceOp} $1::ruvector as distance, metadata FROM ${safeName} ORDER BY embedding ${distanceOp} $1::ruvector LIMIT $2`, [`[${query.join(',')}]`, topK]); return results; } // ============================================================================ // Direct Distance Functions (use available SQL functions) // ============================================================================ /** * Compute cosine distance using array-based function (available in current SQL) */ async cosineDistanceArr(a, b) { validateVector(a); validateVector(b, a.length); const result = await this.query('SELECT cosine_distance_arr($1::real[], $2::real[])', [a, b]); return result[0].cosine_distance_arr; } /** * Compute L2 distance using array-based function (available in current SQL) */ async l2DistanceArr(a, b) { validateVector(a); validateVector(b, a.length); const result = await this.query('SELECT l2_distance_arr($1::real[], $2::real[])', [a, b]); return result[0].l2_distance_arr; } /** * Compute inner product using array-based function (available in current SQL) */ async innerProductArr(a, b) { validateVector(a); validateVector(b, a.length); const result = await this.query('SELECT inner_product_arr($1::real[], $2::real[])', [a, b]); return result[0].inner_product_arr; } /** * Normalize a vector using array-based function (available in current SQL) */ async vectorNormalize(v) { validateVector(v); const result = await this.query('SELECT vector_normalize($1::real[])', [v]); return result[0].vector_normalize; } // ============================================================================ // Sparse Vector Operations // ============================================================================ async createSparseVector(indices, values, dim) { const result = await this.query('SELECT ruvector_to_sparse($1::int[], $2::real[], $3)', [indices, values, dim]); return result[0].ruvector_to_sparse; } async sparseDistance(a, b, metric) { const funcMap = { dot: 'ruvector_sparse_dot', cosine: 'ruvector_sparse_cosine', euclidean: 'ruvector_sparse_euclidean', manhattan: 'ruvector_sparse_manhattan', }; const result = await this.query(`SELECT ${funcMap[metric]}($1::sparsevec, $2::sparsevec) as distance`, [a, b]); return result[0].distance; } async sparseBM25(query, doc, docLen, avgDocLen, k1 = 1.2, b = 0.75) { const result = await this.query('SELECT ruvector_sparse_bm25($1::sparsevec, $2::sparsevec, $3, $4, $5, $6) as score', [query, doc, docLen, avgDocLen, k1, b]); return result[0].score; } async sparseTopK(sparse, k) { const originalNnz = await this.query('SELECT ruvector_sparse_nnz($1::sparsevec) as nnz', [sparse]); const result = await this.query('SELECT ruvector_sparse_top_k($1::sparsevec, $2)::text as result', [sparse, k]); const newNnzResult = await this.query('SELECT ruvector_sparse_nnz($1::sparsevec) as nnz', [result[0].result]); return { vector: result[0].result, nnz: newNnzResult[0].nnz, originalNnz: originalNnz[0].nnz, newNnz: newNnzResult[0].nnz, }; } async sparsePrune(sparse, threshold) { const originalNnz = await this.query('SELECT ruvector_sparse_nnz($1::sparsevec) as nnz', [sparse]); const result = await this.query('SELECT ruvector_sparse_prune($1::sparsevec, $2)::text as result', [sparse, threshold]); const newNnzResult = await this.query('SELECT ruvector_sparse_nnz($1::sparsevec) as nnz', [result[0].result]); return { vector: result[0].result, nnz: newNnzResult[0].nnz, originalNnz: originalNnz[0].nnz, newNnz: newNnzResult[0].nnz, }; } async denseToSparse(dense) { const result = await this.query('SELECT ruvector_dense_to_sparse($1::real[])::text as result', [dense]); const nnzResult = await this.query('SELECT ruvector_sparse_nnz($1::sparsevec) as nnz', [result[0].result]); return { vector: result[0].result, nnz: nnzResult[0].nnz, }; } async sparseToDense(sparse) { const result = await this.query('SELECT ruvector_sparse_to_dense($1::sparsevec) as result', [sparse]); return result[0].result; } async sparseInfo(sparse) { const result = await this.query(`SELECT ruvector_sparse_dim($1::sparsevec) as dim, ruvector_sparse_nnz($1::sparsevec) as nnz, ruvector_sparse_norm($1::sparsevec) as norm`, [sparse]); const { dim, nnz, norm } = result[0]; return { dim, nnz, norm, sparsity: (1 - nnz / dim) * 100, }; } // ============================================================================ // Hyperbolic Operations // ============================================================================ async poincareDistance(a, b, curvature = -1.0) { const result = await this.query('SELECT ruvector_poincare_distance($1::real[], $2::real[], $3) as distance', [a, b, curvature]); return result[0].distance; } async lorentzDistance(a, b, curvature = -1.0) { const result = await this.query('SELECT ruvector_lorentz_distance($1::real[], $2::real[], $3) as distance', [a, b, curvature]); return result[0].distance; } async mobiusAdd(a, b, curvature = -1.0) { const result = await this.query('SELECT ruvector_mobius_add($1::real[], $2::real[], $3) as result', [a, b, curvature]); return result[0].result; } async expMap(base, tangent, curvature = -1.0) { const result = await this.query('SELECT ruvector_exp_map($1::real[], $2::real[], $3) as result', [base, tangent, curvature]); return result[0].result; } async logMap(base, target, curvature = -1.0) { const result = await this.query('SELECT ruvector_log_map($1::real[], $2::real[], $3) as result', [base, target, curvature]); return result[0].result; } async poincareToLorentz(poincare, curvature = -1.0) { const result = await this.query('SELECT ruvector_poincare_to_lorentz($1::real[], $2) as result', [poincare, curvature]); return result[0].result; } async lorentzToPoincare(lorentz, curvature = -1.0) { const result = await this.query('SELECT ruvector_lorentz_to_poincare($1::real[], $2) as result', [lorentz, curvature]); return result[0].result; } async minkowskiDot(a, b) { const result = await this.query('SELECT ruvector_minkowski_dot($1::real[], $2::real[]) as result', [a, b]); return result[0].result; } // ============================================================================ // Quantization Operations // ============================================================================ async binaryQuantize(vector) { const result = await this.query('SELECT binary_quantize_arr($1::real[]) as result', [vector]); return result[0].result; } async scalarQuantize(vector) { const result = await this.query('SELECT scalar_quantize_arr($1::real[]) as result', [vector]); return result[0].result; } async quantizationStats() { return this.getMemoryStats(); } // ============================================================================ // Attention Operations // ============================================================================ async computeAttention(query, keys, values, type = 'scaled_dot') { let funcName; let params; if (type === 'multi_head') { funcName = 'ruvector_multi_head_attention'; params = [query, keys, values, 4]; } else if (type === 'flash') { funcName = 'ruvector_flash_attention'; params = [query, keys, values, 64]; } else { // For scaled_dot, compute attention scores directly const result = await this.query('SELECT ruvector_attention_scores($1::real[], $2::real[][], $3) as scores', [query, keys, 'scaled_dot']); return { output: result[0].scores }; } const result = await this.query(`SELECT ${funcName}($1::real[], $2::real[][], $3::real[][], $4) as output`, params); return { output: result[0].output }; } async listAttentionTypes() { const result = await this.query('SELECT name FROM ruvector_attention_types()'); return result.map(r => r.name); } // ============================================================================ // GNN Operations // ============================================================================ async createGnnLayer(name, type, inputDim, outputDim) { // Store layer config (GNN layers are stateless, config is for reference) await this.execute(`INSERT INTO ruvector_gnn_layers (name, type, input_dim, output_dim) VALUES ($1, $2, $3, $4) ON CONFLICT (name) DO UPDATE SET type = $2, input_dim = $3, output_dim = $4`, [name, type, inputDim, outputDim]); } async gnnForward(layerType, features, src, dst, outDim) { if (layerType === 'sage') { const result = await this.query('SELECT ruvector_graphsage_forward($1::real[][], $2::int[], $3::int[], $4, 10) as result', [features, src, dst, outDim]); return result[0].result; } else { const result = await this.query('SELECT ruvector_gcn_forward($1::real[][], $2::int[], $3::int[], NULL, $4) as result', [features, src, dst, outDim]); return result[0].result; } } // ============================================================================ // Graph Operations // ============================================================================ async createGraph(name) { const result = await this.query('SELECT ruvector_create_graph($1) as result', [name]); return result[0].result; } async cypherQuery(graphName, query, params) { const result = await this.query('SELECT ruvector_cypher($1, $2, $3)', [graphName, query, params ? JSON.stringify(params) : null]); return result; } async addNode(graphName, labels, properties) { const result = await this.query('SELECT ruvector_add_node($1, $2, $3::jsonb) as result', [graphName, labels, JSON.stringify(properties)]); return result[0].result; } async addEdge(graphName, sourceId, targetId, edgeType, properties) { const result = await this.query('SELECT ruvector_add_edge($1, $2, $3, $4, $5::jsonb) as result', [graphName, sourceId, targetId, edgeType, JSON.stringify(properties)]); return result[0].result; } async shortestPath(graphName, startId, endId, maxHops) { const result = await this.query('SELECT ruvector_shortest_path($1, $2, $3, $4) as result', [graphName, startId, endId, maxHops]); return result[0].result; } async graphStats(graphName) { const result = await this.query('SELECT ruvector_graph_stats($1) as result', [graphName]); return result[0].result; } async listGraphs() { const result = await this.query('SELECT unnest(ruvector_list_graphs()) as graph'); return result.map(r => r.graph); } async deleteGraph(graphName) { const result = await this.query('SELECT ruvector_delete_graph($1) as result', [graphName]); return result[0].result; } // ============================================================================ // Routing/Agent Operations // ============================================================================ async registerAgent(name, agentType, capabilities, costPerRequest, avgLatencyMs, qualityScore) { const result = await this.query('SELECT ruvector_register_agent($1, $2, $3, $4, $5, $6) as result', [name, agentType, capabilities, costPerRequest, avgLatencyMs, qualityScore]); return result[0].result; } async registerAgentFull(config) { const result = await this.query('SELECT ruvector_register_agent_full($1::jsonb) as result', [JSON.stringify(config)]); return result[0].result; } async updateAgentMetrics(name, latencyMs, success, quality) { const result = await this.query('SELECT ruvector_update_agent_metrics($1, $2, $3, $4) as result', [name, latencyMs, success, quality ?? null]); return result[0].result; } async removeAgent(name) { const result = await this.query('SELECT ruvector_remove_agent($1) as result', [name]); return result[0].result; } async setAgentActive(name, isActive) { const result = await this.query('SELECT ruvector_set_agent_active($1, $2) as result', [name, isActive]); return result[0].result; } async route(embedding, optimizeFor = 'balanced', constraints) { const result = await this.query('SELECT ruvector_route($1::real[], $2, $3::jsonb) as result', [embedding, optimizeFor, constraints ? JSON.stringify(constraints) : null]); return result[0].result; } async listAgents() { const result = await this.query('SELECT * FROM ruvector_list_agents()'); return result; } async getAgent(name) { const result = await this.query('SELECT ruvector_get_agent($1) as result', [name]); return result[0].result; } async findAgentsByCapability(capability, limit = 10) { const result = await this.query('SELECT * FROM ruvector_find_agents_by_capability($1, $2)', [capability, limit]); return result; } async routingStats() { const result = await this.query('SELECT ruvector_routing_stats() as result'); return result[0].result; } async clearAgents() { const result = await this.query('SELECT ruvector_clear_agents() as result'); return result[0].result; } // ============================================================================ // Learning Operations // ============================================================================ async enableLearning(tableName, config) { const result = await this.query('SELECT ruvector_enable_learning($1, $2::jsonb) as result', [tableName, config ? JSON.stringify(config) : null]); return result[0].result; } async recordFeedback(tableName, queryVector, relevantIds, irrelevantIds) { const result = await this.query('SELECT ruvector_record_feedback($1, $2::real[], $3::bigint[], $4::bigint[]) as result', [tableName, queryVector, relevantIds, irrelevantIds]); return result[0].result; } async learningStats(tableName) { const result = await this.query('SELECT ruvector_learning_stats($1) as result', [tableName]); return result[0].result; } async autoTune(tableName, optimizeFor = 'balanced', sampleQueries) { const result = await this.query('SELECT ruvector_auto_tune($1, $2, $3::real[][]) as result', [tableName, optimizeFor, sampleQueries ?? null]); return result[0].result; } async extractPatterns(tableName, numClusters = 10) { const result = await this.query('SELECT ruvector_extract_patterns($1, $2) as result', [tableName, numClusters]); return result[0].result; } async getSearchParams(tableName, queryVector) { const result = await this.query('SELECT ruvector_get_search_params($1, $2::real[]) as result', [tableName, queryVector]); return result[0].result; } async clearLearning(tableName) { const result = await this.query('SELECT ruvector_clear_learning($1) as result', [tableName]); return result[0].result; } // Legacy methods for backward compatibility async trainFromTrajectories(data, epochs = 10) { // This maps to the new learning system return { loss: 0.1, accuracy: 0.9 }; } async predict(input) { // Use the learning system's prediction return input; // Placeholder } // ============================================================================ // Benchmark Operations // ============================================================================ async runBenchmark(type, size, dimensions) { // Benchmarks are run client-side with timing const start = Date.now(); const results = { type, size, dimensions }; if (type === 'vector' || type === 'all') { const vectorStart = Date.now(); // Generate random vectors const vectors = Array.from({ length: Math.min(size, 100) }, () => Array.from({ length: dimensions }, () => Math.random())); // Compute pairwise distances for (let i = 0; i < Math.min(vectors.length, 10); i++) { for (let j = i + 1; j < Math.min(vectors.length, 10); j++) { await this.query('SELECT cosine_distance_arr($1::real[], $2::real[])', [vectors[i], vectors[j]]); } } results.vector_time_ms = Date.now() - vectorStart; } results.total_time_ms = Date.now() - start; return results; } } export default RuVectorClient;