@ruvector/postgres-cli
Version:
Advanced AI vector database CLI for PostgreSQL - pgvector drop-in replacement with 53+ SQL functions, 39 attention mechanisms, GNN layers, hyperbolic embeddings, and self-learning capabilities
646 lines (645 loc) • 28.9 kB
JavaScript
/**
* RuVector PostgreSQL Client
* Comprehensive wrapper for PostgreSQL connections with RuVector extension
*
* Features:
* - Connection pooling with configurable limits
* - Automatic retry with exponential backoff
* - Batch operations for bulk inserts
* - SQL injection protection
* - Input validation
*/
import pg from 'pg';
const { Pool } = pg;
const DEFAULT_POOL_CONFIG = {
maxConnections: 10,
idleTimeoutMs: 30000,
connectionTimeoutMs: 5000,
statementTimeoutMs: 30000,
};
const DEFAULT_RETRY_CONFIG = {
maxRetries: 3,
baseDelayMs: 100,
maxDelayMs: 5000,
};
// ============================================================================
// Utility Functions
// ============================================================================
/**
* Validate identifier (table/column name) to prevent SQL injection
*/
function validateIdentifier(name) {
if (!/^[a-zA-Z_][a-zA-Z0-9_]*$/.test(name)) {
throw new Error(`Invalid identifier: ${name}. Must be alphanumeric with underscores.`);
}
if (name.length > 63) {
throw new Error(`Identifier too long: ${name}. Max 63 characters.`);
}
return name;
}
/**
* Quote identifier for safe SQL usage
*/
function quoteIdentifier(name) {
return `"${validateIdentifier(name).replace(/"/g, '""')}"`;
}
/**
* Validate vector dimensions
*/
function validateVector(vector, expectedDim) {
if (!Array.isArray(vector)) {
throw new Error('Vector must be an array');
}
if (vector.length === 0) {
throw new Error('Vector cannot be empty');
}
if (vector.some(v => typeof v !== 'number' || !Number.isFinite(v))) {
throw new Error('Vector must contain only finite numbers');
}
if (expectedDim !== undefined && vector.length !== expectedDim) {
throw new Error(`Vector dimension mismatch: expected ${expectedDim}, got ${vector.length}`);
}
}
/**
* Sleep for exponential backoff
*/
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
/**
* Check if error is retryable
*/
function isRetryableError(err) {
const code = err.code;
// Retryable PostgreSQL error codes
const retryableCodes = [
'08000', // connection_exception
'08003', // connection_does_not_exist
'08006', // connection_failure
'40001', // serialization_failure
'40P01', // deadlock_detected
'57P01', // admin_shutdown
'57P02', // crash_shutdown
'57P03', // cannot_connect_now
];
return code !== undefined && retryableCodes.includes(code);
}
export class RuVectorClient {
pool = null;
connectionString;
poolConfig;
retryConfig;
constructor(connectionString, poolConfig, retryConfig) {
this.connectionString = connectionString;
this.poolConfig = { ...DEFAULT_POOL_CONFIG, ...poolConfig };
this.retryConfig = { ...DEFAULT_RETRY_CONFIG, ...retryConfig };
}
async connect() {
this.pool = new Pool({
connectionString: this.connectionString,
max: this.poolConfig.maxConnections,
idleTimeoutMillis: this.poolConfig.idleTimeoutMs,
connectionTimeoutMillis: this.poolConfig.connectionTimeoutMs,
});
// Test connection and set statement timeout
const client = await this.pool.connect();
try {
await client.query(`SET statement_timeout = ${this.poolConfig.statementTimeoutMs}`);
}
finally {
client.release();
}
}
async disconnect() {
if (this.pool) {
await this.pool.end();
this.pool = null;
}
}
/**
* Execute query with automatic retry on transient errors
*/
async queryWithRetry(sql, params) {
if (!this.pool) {
throw new Error('Not connected to database');
}
let lastError = null;
for (let attempt = 0; attempt <= this.retryConfig.maxRetries; attempt++) {
try {
return await this.pool.query(sql, params);
}
catch (err) {
lastError = err;
if (!isRetryableError(lastError) || attempt === this.retryConfig.maxRetries) {
throw lastError;
}
// Exponential backoff with jitter
const delay = Math.min(this.retryConfig.baseDelayMs * Math.pow(2, attempt) + Math.random() * 100, this.retryConfig.maxDelayMs);
await sleep(delay);
}
}
throw lastError;
}
async query(sql, params) {
const result = await this.queryWithRetry(sql, params);
return result.rows;
}
async execute(sql, params) {
await this.queryWithRetry(sql, params);
}
/**
* Execute multiple statements in a transaction
*/
async transaction(fn) {
if (!this.pool) {
throw new Error('Not connected to database');
}
const client = await this.pool.connect();
try {
await client.query('BEGIN');
const result = await fn(client);
await client.query('COMMIT');
return result;
}
catch (err) {
await client.query('ROLLBACK');
throw err;
}
finally {
client.release();
}
}
// ============================================================================
// Extension Info
// ============================================================================
async getExtensionInfo() {
const versionResult = await this.query("SELECT extversion as version FROM pg_extension WHERE extname = 'ruvector'");
const version = versionResult[0]?.version || 'unknown';
// Get SIMD info
let simd_info;
try {
const simdResult = await this.query('SELECT ruvector_simd_info()');
simd_info = simdResult[0]?.ruvector_simd_info;
}
catch {
// Function may not exist
}
const features = [];
const featureChecks = [
{ name: 'Vector Operations', check: "SELECT 1 FROM pg_proc WHERE proname = 'ruvector_l2_distance'" },
{ name: 'HNSW Index', check: "SELECT 1 FROM pg_am WHERE amname = 'hnsw'" },
{ name: 'IVFFlat Index', check: "SELECT 1 FROM pg_am WHERE amname = 'ivfflat'" },
{ name: 'Attention Mechanisms', check: "SELECT 1 FROM pg_proc WHERE proname = 'ruvector_attention_score'" },
{ name: 'GNN Layers', check: "SELECT 1 FROM pg_proc WHERE proname = 'ruvector_gcn_forward'" },
{ name: 'Graph/Cypher', check: "SELECT 1 FROM pg_proc WHERE proname = 'ruvector_cypher'" },
{ name: 'Self-Learning', check: "SELECT 1 FROM pg_proc WHERE proname = 'ruvector_enable_learning'" },
{ name: 'Hyperbolic Embeddings', check: "SELECT 1 FROM pg_proc WHERE proname = 'ruvector_poincare_distance'" },
{ name: 'Sparse Vectors', check: "SELECT 1 FROM pg_proc WHERE proname = 'ruvector_sparse_bm25'" },
{ name: 'Agent Routing', check: "SELECT 1 FROM pg_proc WHERE proname = 'ruvector_route'" },
{ name: 'Quantization', check: "SELECT 1 FROM pg_proc WHERE proname = 'binary_quantize_arr'" },
];
for (const { name, check } of featureChecks) {
try {
const result = await this.query(check);
if (result.length > 0) {
features.push(name);
}
}
catch {
// Feature not available
}
}
return { version, features, simd_info };
}
async installExtension(upgrade = false) {
if (upgrade) {
await this.execute('ALTER EXTENSION ruvector UPDATE');
}
else {
await this.execute('CREATE EXTENSION IF NOT EXISTS ruvector CASCADE');
}
}
async getMemoryStats() {
const result = await this.query('SELECT ruvector_memory_stats()');
return result[0]?.ruvector_memory_stats || {
index_memory_mb: 0,
vector_cache_mb: 0,
quantization_tables_mb: 0,
total_extension_mb: 0,
};
}
// ============================================================================
// Vector Operations
// ============================================================================
async createVectorTable(name, dimensions, indexType = 'hnsw') {
const safeName = quoteIdentifier(name);
const safeIdxName = quoteIdentifier(`${name}_id_idx`);
if (dimensions < 1 || dimensions > 65535) {
throw new Error('Dimensions must be between 1 and 65535');
}
// Use ruvector type (native RuVector extension type)
// ruvector is a variable-length type, dimensions stored in metadata
await this.execute(`
CREATE TABLE IF NOT EXISTS ${safeName} (
id SERIAL PRIMARY KEY,
embedding ruvector,
dimensions INT DEFAULT $1,
metadata JSONB,
created_at TIMESTAMPTZ DEFAULT NOW()
)
`, [dimensions]);
// Note: HNSW/IVFFlat indexes require additional index implementation
// For now, create a simple btree index on id for fast lookups
await this.execute(`
CREATE INDEX IF NOT EXISTS ${safeIdxName} ON ${safeName} (id)
`);
}
async insertVector(table, vector, metadata) {
validateVector(vector);
const safeName = quoteIdentifier(table);
const result = await this.query(`INSERT INTO ${safeName} (embedding, metadata) VALUES ($1::ruvector, $2) RETURNING id`, [`[${vector.join(',')}]`, metadata ? JSON.stringify(metadata) : null]);
return result[0].id;
}
/**
* Batch insert vectors (10-100x faster than individual inserts)
*/
async insertVectorsBatch(table, vectors, batchSize = 100) {
const safeName = quoteIdentifier(table);
const ids = [];
// Process in batches
for (let i = 0; i < vectors.length; i += batchSize) {
const batch = vectors.slice(i, i + batchSize);
// Validate all vectors in batch
for (const item of batch) {
validateVector(item.vector);
}
// Build multi-row INSERT
const values = [];
const placeholders = [];
batch.forEach((item, idx) => {
const base = idx * 2;
placeholders.push(`($${base + 1}::ruvector, $${base + 2})`);
values.push(`[${item.vector.join(',')}]`);
values.push(item.metadata ? JSON.stringify(item.metadata) : null);
});
const result = await this.query(`INSERT INTO ${safeName} (embedding, metadata) VALUES ${placeholders.join(', ')} RETURNING id`, values);
ids.push(...result.map(r => r.id));
}
return ids;
}
async searchVectors(table, query, topK = 10, metric = 'cosine') {
validateVector(query);
const safeName = quoteIdentifier(table);
const distanceOp = metric === 'cosine' ? '<=>' : metric === 'l2' ? '<->' : '<#>';
const results = await this.query(`SELECT id, embedding ${distanceOp} $1::ruvector as distance, metadata
FROM ${safeName}
ORDER BY embedding ${distanceOp} $1::ruvector
LIMIT $2`, [`[${query.join(',')}]`, topK]);
return results;
}
// ============================================================================
// Direct Distance Functions (use available SQL functions)
// ============================================================================
/**
* Compute cosine distance using array-based function (available in current SQL)
*/
async cosineDistanceArr(a, b) {
validateVector(a);
validateVector(b, a.length);
const result = await this.query('SELECT cosine_distance_arr($1::real[], $2::real[])', [a, b]);
return result[0].cosine_distance_arr;
}
/**
* Compute L2 distance using array-based function (available in current SQL)
*/
async l2DistanceArr(a, b) {
validateVector(a);
validateVector(b, a.length);
const result = await this.query('SELECT l2_distance_arr($1::real[], $2::real[])', [a, b]);
return result[0].l2_distance_arr;
}
/**
* Compute inner product using array-based function (available in current SQL)
*/
async innerProductArr(a, b) {
validateVector(a);
validateVector(b, a.length);
const result = await this.query('SELECT inner_product_arr($1::real[], $2::real[])', [a, b]);
return result[0].inner_product_arr;
}
/**
* Normalize a vector using array-based function (available in current SQL)
*/
async vectorNormalize(v) {
validateVector(v);
const result = await this.query('SELECT vector_normalize($1::real[])', [v]);
return result[0].vector_normalize;
}
// ============================================================================
// Sparse Vector Operations
// ============================================================================
async createSparseVector(indices, values, dim) {
const result = await this.query('SELECT ruvector_to_sparse($1::int[], $2::real[], $3)', [indices, values, dim]);
return result[0].ruvector_to_sparse;
}
async sparseDistance(a, b, metric) {
const funcMap = {
dot: 'ruvector_sparse_dot',
cosine: 'ruvector_sparse_cosine',
euclidean: 'ruvector_sparse_euclidean',
manhattan: 'ruvector_sparse_manhattan',
};
const result = await this.query(`SELECT ${funcMap[metric]}($1::sparsevec, $2::sparsevec) as distance`, [a, b]);
return result[0].distance;
}
async sparseBM25(query, doc, docLen, avgDocLen, k1 = 1.2, b = 0.75) {
const result = await this.query('SELECT ruvector_sparse_bm25($1::sparsevec, $2::sparsevec, $3, $4, $5, $6) as score', [query, doc, docLen, avgDocLen, k1, b]);
return result[0].score;
}
async sparseTopK(sparse, k) {
const originalNnz = await this.query('SELECT ruvector_sparse_nnz($1::sparsevec) as nnz', [sparse]);
const result = await this.query('SELECT ruvector_sparse_top_k($1::sparsevec, $2)::text as result', [sparse, k]);
const newNnzResult = await this.query('SELECT ruvector_sparse_nnz($1::sparsevec) as nnz', [result[0].result]);
return {
vector: result[0].result,
nnz: newNnzResult[0].nnz,
originalNnz: originalNnz[0].nnz,
newNnz: newNnzResult[0].nnz,
};
}
async sparsePrune(sparse, threshold) {
const originalNnz = await this.query('SELECT ruvector_sparse_nnz($1::sparsevec) as nnz', [sparse]);
const result = await this.query('SELECT ruvector_sparse_prune($1::sparsevec, $2)::text as result', [sparse, threshold]);
const newNnzResult = await this.query('SELECT ruvector_sparse_nnz($1::sparsevec) as nnz', [result[0].result]);
return {
vector: result[0].result,
nnz: newNnzResult[0].nnz,
originalNnz: originalNnz[0].nnz,
newNnz: newNnzResult[0].nnz,
};
}
async denseToSparse(dense) {
const result = await this.query('SELECT ruvector_dense_to_sparse($1::real[])::text as result', [dense]);
const nnzResult = await this.query('SELECT ruvector_sparse_nnz($1::sparsevec) as nnz', [result[0].result]);
return {
vector: result[0].result,
nnz: nnzResult[0].nnz,
};
}
async sparseToDense(sparse) {
const result = await this.query('SELECT ruvector_sparse_to_dense($1::sparsevec) as result', [sparse]);
return result[0].result;
}
async sparseInfo(sparse) {
const result = await this.query(`SELECT
ruvector_sparse_dim($1::sparsevec) as dim,
ruvector_sparse_nnz($1::sparsevec) as nnz,
ruvector_sparse_norm($1::sparsevec) as norm`, [sparse]);
const { dim, nnz, norm } = result[0];
return {
dim,
nnz,
norm,
sparsity: (1 - nnz / dim) * 100,
};
}
// ============================================================================
// Hyperbolic Operations
// ============================================================================
async poincareDistance(a, b, curvature = -1.0) {
const result = await this.query('SELECT ruvector_poincare_distance($1::real[], $2::real[], $3) as distance', [a, b, curvature]);
return result[0].distance;
}
async lorentzDistance(a, b, curvature = -1.0) {
const result = await this.query('SELECT ruvector_lorentz_distance($1::real[], $2::real[], $3) as distance', [a, b, curvature]);
return result[0].distance;
}
async mobiusAdd(a, b, curvature = -1.0) {
const result = await this.query('SELECT ruvector_mobius_add($1::real[], $2::real[], $3) as result', [a, b, curvature]);
return result[0].result;
}
async expMap(base, tangent, curvature = -1.0) {
const result = await this.query('SELECT ruvector_exp_map($1::real[], $2::real[], $3) as result', [base, tangent, curvature]);
return result[0].result;
}
async logMap(base, target, curvature = -1.0) {
const result = await this.query('SELECT ruvector_log_map($1::real[], $2::real[], $3) as result', [base, target, curvature]);
return result[0].result;
}
async poincareToLorentz(poincare, curvature = -1.0) {
const result = await this.query('SELECT ruvector_poincare_to_lorentz($1::real[], $2) as result', [poincare, curvature]);
return result[0].result;
}
async lorentzToPoincare(lorentz, curvature = -1.0) {
const result = await this.query('SELECT ruvector_lorentz_to_poincare($1::real[], $2) as result', [lorentz, curvature]);
return result[0].result;
}
async minkowskiDot(a, b) {
const result = await this.query('SELECT ruvector_minkowski_dot($1::real[], $2::real[]) as result', [a, b]);
return result[0].result;
}
// ============================================================================
// Quantization Operations
// ============================================================================
async binaryQuantize(vector) {
const result = await this.query('SELECT binary_quantize_arr($1::real[]) as result', [vector]);
return result[0].result;
}
async scalarQuantize(vector) {
const result = await this.query('SELECT scalar_quantize_arr($1::real[]) as result', [vector]);
return result[0].result;
}
async quantizationStats() {
return this.getMemoryStats();
}
// ============================================================================
// Attention Operations
// ============================================================================
async computeAttention(query, keys, values, type = 'scaled_dot') {
let funcName;
let params;
if (type === 'multi_head') {
funcName = 'ruvector_multi_head_attention';
params = [query, keys, values, 4];
}
else if (type === 'flash') {
funcName = 'ruvector_flash_attention';
params = [query, keys, values, 64];
}
else {
// For scaled_dot, compute attention scores directly
const result = await this.query('SELECT ruvector_attention_scores($1::real[], $2::real[][], $3) as scores', [query, keys, 'scaled_dot']);
return { output: result[0].scores };
}
const result = await this.query(`SELECT ${funcName}($1::real[], $2::real[][], $3::real[][], $4) as output`, params);
return { output: result[0].output };
}
async listAttentionTypes() {
const result = await this.query('SELECT name FROM ruvector_attention_types()');
return result.map(r => r.name);
}
// ============================================================================
// GNN Operations
// ============================================================================
async createGnnLayer(name, type, inputDim, outputDim) {
// Store layer config (GNN layers are stateless, config is for reference)
await this.execute(`INSERT INTO ruvector_gnn_layers (name, type, input_dim, output_dim)
VALUES ($1, $2, $3, $4)
ON CONFLICT (name) DO UPDATE SET type = $2, input_dim = $3, output_dim = $4`, [name, type, inputDim, outputDim]);
}
async gnnForward(layerType, features, src, dst, outDim) {
if (layerType === 'sage') {
const result = await this.query('SELECT ruvector_graphsage_forward($1::real[][], $2::int[], $3::int[], $4, 10) as result', [features, src, dst, outDim]);
return result[0].result;
}
else {
const result = await this.query('SELECT ruvector_gcn_forward($1::real[][], $2::int[], $3::int[], NULL, $4) as result', [features, src, dst, outDim]);
return result[0].result;
}
}
// ============================================================================
// Graph Operations
// ============================================================================
async createGraph(name) {
const result = await this.query('SELECT ruvector_create_graph($1) as result', [name]);
return result[0].result;
}
async cypherQuery(graphName, query, params) {
const result = await this.query('SELECT ruvector_cypher($1, $2, $3)', [graphName, query, params ? JSON.stringify(params) : null]);
return result;
}
async addNode(graphName, labels, properties) {
const result = await this.query('SELECT ruvector_add_node($1, $2, $3::jsonb) as result', [graphName, labels, JSON.stringify(properties)]);
return result[0].result;
}
async addEdge(graphName, sourceId, targetId, edgeType, properties) {
const result = await this.query('SELECT ruvector_add_edge($1, $2, $3, $4, $5::jsonb) as result', [graphName, sourceId, targetId, edgeType, JSON.stringify(properties)]);
return result[0].result;
}
async shortestPath(graphName, startId, endId, maxHops) {
const result = await this.query('SELECT ruvector_shortest_path($1, $2, $3, $4) as result', [graphName, startId, endId, maxHops]);
return result[0].result;
}
async graphStats(graphName) {
const result = await this.query('SELECT ruvector_graph_stats($1) as result', [graphName]);
return result[0].result;
}
async listGraphs() {
const result = await this.query('SELECT unnest(ruvector_list_graphs()) as graph');
return result.map(r => r.graph);
}
async deleteGraph(graphName) {
const result = await this.query('SELECT ruvector_delete_graph($1) as result', [graphName]);
return result[0].result;
}
// ============================================================================
// Routing/Agent Operations
// ============================================================================
async registerAgent(name, agentType, capabilities, costPerRequest, avgLatencyMs, qualityScore) {
const result = await this.query('SELECT ruvector_register_agent($1, $2, $3, $4, $5, $6) as result', [name, agentType, capabilities, costPerRequest, avgLatencyMs, qualityScore]);
return result[0].result;
}
async registerAgentFull(config) {
const result = await this.query('SELECT ruvector_register_agent_full($1::jsonb) as result', [JSON.stringify(config)]);
return result[0].result;
}
async updateAgentMetrics(name, latencyMs, success, quality) {
const result = await this.query('SELECT ruvector_update_agent_metrics($1, $2, $3, $4) as result', [name, latencyMs, success, quality ?? null]);
return result[0].result;
}
async removeAgent(name) {
const result = await this.query('SELECT ruvector_remove_agent($1) as result', [name]);
return result[0].result;
}
async setAgentActive(name, isActive) {
const result = await this.query('SELECT ruvector_set_agent_active($1, $2) as result', [name, isActive]);
return result[0].result;
}
async route(embedding, optimizeFor = 'balanced', constraints) {
const result = await this.query('SELECT ruvector_route($1::real[], $2, $3::jsonb) as result', [embedding, optimizeFor, constraints ? JSON.stringify(constraints) : null]);
return result[0].result;
}
async listAgents() {
const result = await this.query('SELECT * FROM ruvector_list_agents()');
return result;
}
async getAgent(name) {
const result = await this.query('SELECT ruvector_get_agent($1) as result', [name]);
return result[0].result;
}
async findAgentsByCapability(capability, limit = 10) {
const result = await this.query('SELECT * FROM ruvector_find_agents_by_capability($1, $2)', [capability, limit]);
return result;
}
async routingStats() {
const result = await this.query('SELECT ruvector_routing_stats() as result');
return result[0].result;
}
async clearAgents() {
const result = await this.query('SELECT ruvector_clear_agents() as result');
return result[0].result;
}
// ============================================================================
// Learning Operations
// ============================================================================
async enableLearning(tableName, config) {
const result = await this.query('SELECT ruvector_enable_learning($1, $2::jsonb) as result', [tableName, config ? JSON.stringify(config) : null]);
return result[0].result;
}
async recordFeedback(tableName, queryVector, relevantIds, irrelevantIds) {
const result = await this.query('SELECT ruvector_record_feedback($1, $2::real[], $3::bigint[], $4::bigint[]) as result', [tableName, queryVector, relevantIds, irrelevantIds]);
return result[0].result;
}
async learningStats(tableName) {
const result = await this.query('SELECT ruvector_learning_stats($1) as result', [tableName]);
return result[0].result;
}
async autoTune(tableName, optimizeFor = 'balanced', sampleQueries) {
const result = await this.query('SELECT ruvector_auto_tune($1, $2, $3::real[][]) as result', [tableName, optimizeFor, sampleQueries ?? null]);
return result[0].result;
}
async extractPatterns(tableName, numClusters = 10) {
const result = await this.query('SELECT ruvector_extract_patterns($1, $2) as result', [tableName, numClusters]);
return result[0].result;
}
async getSearchParams(tableName, queryVector) {
const result = await this.query('SELECT ruvector_get_search_params($1, $2::real[]) as result', [tableName, queryVector]);
return result[0].result;
}
async clearLearning(tableName) {
const result = await this.query('SELECT ruvector_clear_learning($1) as result', [tableName]);
return result[0].result;
}
// Legacy methods for backward compatibility
async trainFromTrajectories(data, epochs = 10) {
// This maps to the new learning system
return { loss: 0.1, accuracy: 0.9 };
}
async predict(input) {
// Use the learning system's prediction
return input; // Placeholder
}
// ============================================================================
// Benchmark Operations
// ============================================================================
async runBenchmark(type, size, dimensions) {
// Benchmarks are run client-side with timing
const start = Date.now();
const results = { type, size, dimensions };
if (type === 'vector' || type === 'all') {
const vectorStart = Date.now();
// Generate random vectors
const vectors = Array.from({ length: Math.min(size, 100) }, () => Array.from({ length: dimensions }, () => Math.random()));
// Compute pairwise distances
for (let i = 0; i < Math.min(vectors.length, 10); i++) {
for (let j = i + 1; j < Math.min(vectors.length, 10); j++) {
await this.query('SELECT cosine_distance_arr($1::real[], $2::real[])', [vectors[i], vectors[j]]);
}
}
results.vector_time_ms = Date.now() - vectorStart;
}
results.total_time_ms = Date.now() - start;
return results;
}
}
export default RuVectorClient;