claude-flow
Version:
Ruflo - Enterprise AI agent orchestration for Claude Code. Deploy 60+ specialized agents in coordinated swarms with self-learning, fault-tolerant consensus, vector memory, and MCP integration
1,738 lines (1,497 loc) • 48.6 kB
text/typescript
/**
* RuVector PostgreSQL Bridge - Streaming and Transaction Support
*
* Provides streaming capabilities for large result sets and batch operations,
* enhanced transaction handling with savepoints and isolation levels,
* and efficient batch processing with backpressure handling.
*
* @module @claude-flow/plugins/integrations/ruvector/streaming
* @version 1.0.0
*/
import { EventEmitter } from 'events';
import type {
VectorSearchOptions,
VectorSearchResult,
VectorInsertOptions,
VectorUpdateOptions,
BatchResult,
DistanceMetric,
QueryResult,
} from './types.js';
// ============================================================================
// Type Definitions
// ============================================================================
/**
* PostgreSQL PoolClient interface (from pg package).
*/
export interface PoolClient {
query<T = unknown>(text: string, values?: unknown[]): Promise<PgQueryResult<T>>;
release(err?: Error): void;
}
/**
* PostgreSQL query result interface.
*/
interface PgQueryResult<T> {
rows: T[];
rowCount: number | null;
command: string;
fields?: Array<{ name: string; dataTypeID: number }>;
}
/**
* Pool interface for connection management.
*/
interface Pool {
connect(): Promise<PoolClient>;
query<T = unknown>(text: string, values?: unknown[]): Promise<PgQueryResult<T>>;
end(): Promise<void>;
on(event: string, callback: (...args: unknown[]) => void): this;
totalCount: number;
idleCount: number;
waitingCount: number;
}
/**
* Extended search options for streaming operations.
*/
export interface StreamSearchOptions extends VectorSearchOptions {
/** Number of results per batch (default: 1000) */
batchSize?: number;
/** Cursor name for server-side cursor */
cursorName?: string;
/** Query timeout in milliseconds */
timeout?: number;
/** Whether to use a server-side cursor */
useServerCursor?: boolean;
/** Fetch direction for cursor */
fetchDirection?: 'forward' | 'backward';
}
/**
* Insert result for streaming operations.
*/
export interface InsertResult {
/** ID of the inserted vector */
id: string | number;
/** Whether the insert was successful */
success: boolean;
/** Error message if insert failed */
error?: string;
/** Batch index */
batchIndex: number;
/** Item index within batch */
itemIndex: number;
}
/**
* Vector entry for streaming inserts.
*/
export interface VectorEntry {
/** Optional ID (auto-generated if not provided) */
id?: string | number;
/** Vector data */
vector: number[] | Float32Array;
/** Optional metadata */
metadata?: Record<string, unknown>;
}
/**
* Transaction isolation levels.
*/
export type IsolationLevel = 'read_committed' | 'repeatable_read' | 'serializable';
/**
* Batch processing options.
*/
export interface BatchOptions {
/** Batch size for processing */
batchSize?: number;
/** Maximum concurrent batches */
concurrency?: number;
/** Retry failed operations */
retryOnFailure?: boolean;
/** Maximum retry attempts */
maxRetries?: number;
/** Enable transaction mode */
useTransaction?: boolean;
}
/**
* Pool events interface.
*/
export interface PoolEvents {
'pool:connect': (client: PoolClient) => void;
'pool:acquire': (client: PoolClient) => void;
'pool:release': (client: PoolClient) => void;
'pool:remove': (client: PoolClient) => void;
'pool:error': (error: Error, client?: PoolClient) => void;
}
/**
* Stream state for backpressure handling.
*/
interface StreamState {
paused: boolean;
buffer: unknown[];
bufferSize: number;
highWaterMark: number;
drainPromise: Promise<void> | null;
drainResolve: (() => void) | null;
}
// ============================================================================
// Constants
// ============================================================================
const DEFAULT_BATCH_SIZE = 1000;
const DEFAULT_CONCURRENCY = 4;
const DEFAULT_HIGH_WATER_MARK = 16384;
const DEFAULT_TIMEOUT_MS = 30000;
const DEFAULT_CURSOR_PREFIX = 'ruvector_cursor_';
// Distance operators mapping
const DISTANCE_OPERATORS: Record<DistanceMetric, string> = {
cosine: '<=>',
euclidean: '<->',
dot: '<#>',
hamming: '<~>',
manhattan: '<+>',
chebyshev: '<+>',
jaccard: '<~>',
minkowski: '<->',
bray_curtis: '<->',
canberra: '<->',
mahalanobis: '<->',
correlation: '<=>',
};
// ============================================================================
// RuVectorStream Class
// ============================================================================
/**
* Streaming support for RuVector operations.
*
* Provides async generators for streaming large result sets and batch inserts
* with backpressure handling.
*
* @example
* ```typescript
* const stream = new RuVectorStream(pool, config);
*
* // Stream search results
* for await (const result of stream.streamSearch({ query: vector, k: 10000 })) {
* console.log(result);
* }
*
* // Stream inserts
* async function* vectorGenerator() {
* for (let i = 0; i < 100000; i++) {
* yield { vector: generateVector(), metadata: { index: i } };
* }
* }
*
* for await (const result of stream.streamInsert(vectorGenerator())) {
* console.log(`Inserted: ${result.id}`);
* }
* ```
*/
export class RuVectorStream extends EventEmitter {
private readonly pool: Pool;
private readonly schema?: string;
private readonly defaultTableName: string;
private readonly state: StreamState;
private activeClient: PoolClient | null = null;
private activeCursors: Set<string> = new Set();
constructor(
pool: Pool,
options: {
schema?: string;
defaultTableName?: string;
highWaterMark?: number;
} = {}
) {
super();
this.pool = pool;
this.schema = options.schema;
this.defaultTableName = options.defaultTableName ?? 'vectors';
this.state = {
paused: false,
buffer: [],
bufferSize: 0,
highWaterMark: options.highWaterMark ?? DEFAULT_HIGH_WATER_MARK,
drainPromise: null,
drainResolve: null,
};
}
// ===========================================================================
// Stream Search
// ===========================================================================
/**
* Stream large result sets using server-side cursors.
*
* @param options - Search options with streaming configuration
* @yields {VectorSearchResult} Individual search results
*/
async *streamSearch(options: StreamSearchOptions): AsyncGenerator<VectorSearchResult, void, undefined> {
const batchSize = options.batchSize ?? DEFAULT_BATCH_SIZE;
const cursorName = options.cursorName ?? `${DEFAULT_CURSOR_PREFIX}${Date.now()}_${Math.random().toString(36).slice(2)}`;
const timeout = options.timeout ?? DEFAULT_TIMEOUT_MS;
const useServerCursor = options.useServerCursor ?? true;
const client = await this.pool.connect();
this.activeClient = client;
this.activeCursors.add(cursorName);
try {
// Set statement timeout
await client.query(`SET LOCAL statement_timeout = ${timeout}`);
if (useServerCursor) {
// Use server-side cursor for memory efficiency
yield* this.streamWithCursor(client, options, cursorName, batchSize);
} else {
// Use OFFSET/LIMIT pagination (less efficient but simpler)
yield* this.streamWithPagination(client, options, batchSize);
}
} finally {
// Cleanup
if (this.activeCursors.has(cursorName)) {
try {
await client.query(`CLOSE ${this.escapeIdentifier(cursorName)}`);
} catch {
// Cursor may already be closed
}
this.activeCursors.delete(cursorName);
}
client.release();
this.activeClient = null;
}
}
/**
* Stream results using a server-side cursor.
*/
private async *streamWithCursor(
client: PoolClient,
options: StreamSearchOptions,
cursorName: string,
batchSize: number
): AsyncGenerator<VectorSearchResult, void, undefined> {
const { sql, params } = this.buildSearchQuery(options);
const escapedCursor = this.escapeIdentifier(cursorName);
// Begin transaction for cursor
await client.query('BEGIN');
try {
// Declare cursor
await client.query(
`DECLARE ${escapedCursor} CURSOR WITH HOLD FOR ${sql}`,
params
);
let rank = 0;
let hasMore = true;
while (hasMore) {
// Wait if paused (backpressure)
await this.waitIfPaused();
// Fetch batch
const fetchResult = await client.query<{
id: string | number;
distance: number;
[key: string]: unknown;
}>(
`FETCH ${batchSize} FROM ${escapedCursor}`
);
if (fetchResult.rows.length === 0) {
hasMore = false;
break;
}
// Yield individual results
for (const row of fetchResult.rows) {
rank++;
const result = this.transformSearchResult(row, options, rank);
yield result;
this.emit('result', result);
}
// Check if we've received less than batch size (end of results)
if (fetchResult.rows.length < batchSize) {
hasMore = false;
}
}
await client.query('COMMIT');
} catch (error) {
await client.query('ROLLBACK');
throw error;
}
}
/**
* Stream results using OFFSET/LIMIT pagination.
*/
private async *streamWithPagination(
client: PoolClient,
options: StreamSearchOptions,
batchSize: number
): AsyncGenerator<VectorSearchResult, void, undefined> {
const { sql: baseSql, params } = this.buildSearchQuery(options, true);
let offset = 0;
let rank = 0;
let hasMore = true;
while (hasMore) {
// Wait if paused (backpressure)
await this.waitIfPaused();
const sql = `${baseSql} LIMIT ${batchSize} OFFSET ${offset}`;
const result = await client.query<{
id: string | number;
distance: number;
[key: string]: unknown;
}>(sql, params);
if (result.rows.length === 0) {
hasMore = false;
break;
}
for (const row of result.rows) {
rank++;
const searchResult = this.transformSearchResult(row, options, rank);
yield searchResult;
this.emit('result', searchResult);
}
offset += batchSize;
if (result.rows.length < batchSize) {
hasMore = false;
}
}
}
/**
* Build the search query SQL.
*/
private buildSearchQuery(
options: StreamSearchOptions,
forPagination = false
): { sql: string; params: unknown[] } {
const tableName = options.tableName ?? this.defaultTableName;
const vectorColumn = options.vectorColumn ?? 'embedding';
const metric = options.metric ?? 'cosine';
const operator = DISTANCE_OPERATORS[metric] ?? '<=>';
const queryVector = this.formatVector(options.query);
const schemaPrefix = this.schema ? `${this.escapeIdentifier(this.schema)}.` : '';
// Build SELECT columns
const selectColumns = options.selectColumns ?? ['id'];
const columnList = [...selectColumns];
if (options.includeVector) {
columnList.push(vectorColumn);
}
if (options.includeMetadata) {
columnList.push('metadata');
}
const distanceExpr = `${this.escapeIdentifier(vectorColumn)} ${operator} '${queryVector}'::vector`;
columnList.push(`(${distanceExpr}) as distance`);
// Build WHERE clause
const whereClauses: string[] = [];
const params: unknown[] = [];
let paramIndex = 1;
if (options.threshold !== undefined) {
if (metric === 'cosine' || metric === 'dot') {
whereClauses.push(`(1 - (${distanceExpr})) >= $${paramIndex++}`);
params.push(options.threshold);
} else {
whereClauses.push(`(${distanceExpr}) <= $${paramIndex++}`);
params.push(options.threshold);
}
}
if (options.maxDistance !== undefined) {
whereClauses.push(`(${distanceExpr}) <= $${paramIndex++}`);
params.push(options.maxDistance);
}
if (options.filter) {
for (const [key, value] of Object.entries(options.filter)) {
if (key === 'metadata') {
whereClauses.push(`metadata @> $${paramIndex++}::jsonb`);
params.push(JSON.stringify(value));
} else {
whereClauses.push(`${this.escapeIdentifier(key)} = $${paramIndex++}`);
params.push(value);
}
}
}
// Build query
let sql = `SELECT ${columnList.join(', ')} FROM ${schemaPrefix}${this.escapeIdentifier(tableName)}`;
if (whereClauses.length > 0) {
sql += ` WHERE ${whereClauses.join(' AND ')}`;
}
sql += ` ORDER BY ${distanceExpr} ASC`;
// For cursor-based streaming, don't add LIMIT (cursor handles it)
// For pagination, LIMIT/OFFSET will be added by the caller
if (!forPagination && options.k) {
sql += ` LIMIT ${options.k}`;
}
return { sql, params };
}
/**
* Transform a database row into a VectorSearchResult.
*/
private transformSearchResult(
row: { id: string | number; distance: number; [key: string]: unknown },
options: StreamSearchOptions,
rank: number
): VectorSearchResult {
const metric = options.metric ?? 'cosine';
const score = metric === 'cosine' || metric === 'dot'
? 1 - row.distance
: 1 / (1 + row.distance);
const result: VectorSearchResult = {
id: row.id,
score,
distance: row.distance,
rank,
retrievedAt: new Date(),
};
if (options.includeVector && row[options.vectorColumn ?? 'embedding']) {
(result as { vector?: number[] }).vector = this.parseVector(
row[options.vectorColumn ?? 'embedding'] as string
);
}
if (options.includeMetadata && row.metadata) {
(result as { metadata?: Record<string, unknown> }).metadata =
row.metadata as Record<string, unknown>;
}
return result;
}
// ===========================================================================
// Stream Insert
// ===========================================================================
/**
* Stream batch inserts for large datasets.
*
* @param vectors - Async iterable of vector entries
* @param options - Insert configuration options
* @yields {InsertResult} Individual insert results
*/
async *streamInsert(
vectors: AsyncIterable<VectorEntry>,
options: {
tableName?: string;
vectorColumn?: string;
batchSize?: number;
upsert?: boolean;
conflictColumns?: string[];
} = {}
): AsyncGenerator<InsertResult, void, undefined> {
const tableName = options.tableName ?? this.defaultTableName;
const vectorColumn = options.vectorColumn ?? 'embedding';
const batchSize = options.batchSize ?? DEFAULT_BATCH_SIZE;
const schemaPrefix = this.schema ? `${this.escapeIdentifier(this.schema)}.` : '';
let batch: VectorEntry[] = [];
let batchIndex = 0;
let totalProcessed = 0;
const client = await this.pool.connect();
this.activeClient = client;
try {
// Process vectors in batches
for await (const entry of vectors) {
// Wait if paused (backpressure)
await this.waitIfPaused();
batch.push(entry);
if (batch.length >= batchSize) {
// Process batch
const results = await this.insertBatch(
client,
batch,
tableName,
vectorColumn,
schemaPrefix,
batchIndex,
options.upsert,
options.conflictColumns
);
for (const result of results) {
yield result;
totalProcessed++;
this.emit('insert', result);
}
batch = [];
batchIndex++;
}
}
// Process remaining items
if (batch.length > 0) {
const results = await this.insertBatch(
client,
batch,
tableName,
vectorColumn,
schemaPrefix,
batchIndex,
options.upsert,
options.conflictColumns
);
for (const result of results) {
yield result;
totalProcessed++;
this.emit('insert', result);
}
}
this.emit('complete', { totalProcessed, batches: batchIndex + 1 });
} finally {
client.release();
this.activeClient = null;
}
}
/**
* Insert a batch of vectors.
*/
private async insertBatch(
client: PoolClient,
batch: VectorEntry[],
tableName: string,
vectorColumn: string,
schemaPrefix: string,
batchIndex: number,
upsert?: boolean,
conflictColumns?: string[]
): Promise<InsertResult[]> {
const results: InsertResult[] = [];
// Build multi-row INSERT
const values: string[] = [];
const params: unknown[] = [];
let paramIndex = 1;
for (const item of batch) {
const vector = this.formatVector(item.vector);
const metadata = item.metadata ? JSON.stringify(item.metadata) : null;
if (item.id !== undefined) {
values.push(`($${paramIndex++}, '${vector}'::vector, $${paramIndex++}::jsonb)`);
params.push(item.id, metadata);
} else {
values.push(`(gen_random_uuid(), '${vector}'::vector, $${paramIndex++}::jsonb)`);
params.push(metadata);
}
}
let sql = `INSERT INTO ${schemaPrefix}${this.escapeIdentifier(tableName)} `;
sql += `(id, ${this.escapeIdentifier(vectorColumn)}, metadata) VALUES ${values.join(', ')}`;
if (upsert) {
const conflictCols = conflictColumns ?? ['id'];
sql += ` ON CONFLICT (${conflictCols.join(', ')}) DO UPDATE SET `;
sql += `${this.escapeIdentifier(vectorColumn)} = EXCLUDED.${this.escapeIdentifier(vectorColumn)}, `;
sql += `metadata = EXCLUDED.metadata`;
}
sql += ' RETURNING id';
try {
const result = await client.query<{ id: string | number }>(sql, params);
for (let i = 0; i < result.rows.length; i++) {
results.push({
id: result.rows[i].id,
success: true,
batchIndex,
itemIndex: i,
});
}
} catch (error) {
// On batch failure, try individual inserts
for (let i = 0; i < batch.length; i++) {
const item = batch[i];
try {
const vector = this.formatVector(item.vector);
const metadata = item.metadata ? JSON.stringify(item.metadata) : null;
const singleSql = `INSERT INTO ${schemaPrefix}${this.escapeIdentifier(tableName)} ` +
`(id, ${this.escapeIdentifier(vectorColumn)}, metadata) VALUES ` +
`($1, '${vector}'::vector, $2::jsonb) RETURNING id`;
const singleResult = await client.query<{ id: string | number }>(
singleSql,
[item.id ?? null, metadata]
);
results.push({
id: singleResult.rows[0]?.id ?? item.id ?? 'unknown',
success: true,
batchIndex,
itemIndex: i,
});
} catch (itemError) {
results.push({
id: item.id ?? 'unknown',
success: false,
error: (itemError as Error).message,
batchIndex,
itemIndex: i,
});
}
}
}
return results;
}
// ===========================================================================
// Backpressure Handling
// ===========================================================================
/**
* Pause the stream (backpressure).
*/
pause(): void {
this.state.paused = true;
this.emit('pause');
}
/**
* Resume the stream.
*/
resume(): void {
this.state.paused = false;
if (this.state.drainResolve) {
this.state.drainResolve();
this.state.drainResolve = null;
this.state.drainPromise = null;
}
this.emit('resume');
}
/**
* Check if stream is paused.
*/
isPaused(): boolean {
return this.state.paused;
}
/**
* Wait if the stream is paused.
*/
private async waitIfPaused(): Promise<void> {
if (!this.state.paused) {
return;
}
if (!this.state.drainPromise) {
this.state.drainPromise = new Promise<void>(resolve => {
this.state.drainResolve = resolve;
});
}
await this.state.drainPromise;
}
// ===========================================================================
// Cleanup
// ===========================================================================
/**
* Abort all active operations.
*/
async abort(): Promise<void> {
// Close all active cursors
if (this.activeClient) {
const cursors = Array.from(this.activeCursors);
for (let i = 0; i < cursors.length; i++) {
const cursorName = cursors[i];
try {
await this.activeClient.query(`CLOSE ${this.escapeIdentifier(cursorName)}`);
} catch {
// Ignore errors
}
}
this.activeCursors.clear();
}
this.emit('abort');
}
// ===========================================================================
// Utility Methods
// ===========================================================================
/**
* Format vector for SQL.
*/
private formatVector(vector: number[] | Float32Array): string {
const arr = Array.isArray(vector) ? vector : Array.from(vector);
return `[${arr.join(',')}]`;
}
/**
* Parse vector from SQL result.
*/
private parseVector(vectorStr: string): number[] {
const cleaned = vectorStr.replace(/[\[\]{}]/g, '');
return cleaned.split(',').map(Number);
}
/**
* Escape SQL identifier.
*/
private escapeIdentifier(identifier: string): string {
return `"${identifier.replace(/"/g, '""')}"`;
}
}
// ============================================================================
// RuVectorTransaction Class
// ============================================================================
/**
* Enhanced transaction support for RuVector operations.
*
* Provides transaction management with:
* - Isolation levels (read_committed, repeatable_read, serializable)
* - Savepoints for partial rollback
* - Vector operations within transaction context
*
* @example
* ```typescript
* const tx = new RuVectorTransaction(client);
* await tx.begin('serializable');
*
* try {
* await tx.savepoint('before_insert');
* await tx.insert({ tableName: 'vectors', vectors: [...] });
*
* const results = await tx.search({ query: vector, k: 10 });
*
* if (results.length === 0) {
* await tx.rollbackToSavepoint('before_insert');
* }
*
* await tx.commit();
* } catch (error) {
* await tx.rollback();
* throw error;
* }
* ```
*/
export class RuVectorTransaction extends EventEmitter {
private readonly client: PoolClient;
private readonly schema?: string;
private readonly defaultTableName: string;
private transactionId: string | null = null;
private isActive = false;
private savepoints: Set<string> = new Set();
private queryCount = 0;
private startTime: number | null = null;
constructor(
client: PoolClient,
options: {
schema?: string;
defaultTableName?: string;
} = {}
) {
super();
this.client = client;
this.schema = options.schema;
this.defaultTableName = options.defaultTableName ?? 'vectors';
}
// ===========================================================================
// Transaction Control
// ===========================================================================
/**
* Begin a transaction with optional isolation level.
*
* @param isolation - Transaction isolation level
*/
async begin(isolation?: IsolationLevel): Promise<void> {
if (this.isActive) {
throw new Error('Transaction already active');
}
this.transactionId = `tx_${Date.now()}_${Math.random().toString(36).slice(2)}`;
this.startTime = Date.now();
let sql = 'BEGIN';
if (isolation) {
sql += ` ISOLATION LEVEL ${isolation.replace('_', ' ').toUpperCase()}`;
}
await this.client.query(sql);
this.isActive = true;
this.queryCount = 1;
this.emit('begin', { transactionId: this.transactionId, isolation });
}
/**
* Create a savepoint within the transaction.
*
* @param name - Savepoint name
*/
async savepoint(name: string): Promise<void> {
this.ensureActive();
const escapedName = this.escapeIdentifier(name);
await this.client.query(`SAVEPOINT ${escapedName}`);
this.savepoints.add(name);
this.queryCount++;
this.emit('savepoint', { transactionId: this.transactionId, name });
}
/**
* Rollback to a savepoint.
*
* @param name - Savepoint name
*/
async rollbackToSavepoint(name: string): Promise<void> {
this.ensureActive();
if (!this.savepoints.has(name)) {
throw new Error(`Savepoint '${name}' does not exist`);
}
const escapedName = this.escapeIdentifier(name);
await this.client.query(`ROLLBACK TO SAVEPOINT ${escapedName}`);
this.queryCount++;
this.emit('rollback_to_savepoint', { transactionId: this.transactionId, name });
}
/**
* Release a savepoint.
*
* @param name - Savepoint name
*/
async releaseSavepoint(name: string): Promise<void> {
this.ensureActive();
if (!this.savepoints.has(name)) {
throw new Error(`Savepoint '${name}' does not exist`);
}
const escapedName = this.escapeIdentifier(name);
await this.client.query(`RELEASE SAVEPOINT ${escapedName}`);
this.savepoints.delete(name);
this.queryCount++;
this.emit('release_savepoint', { transactionId: this.transactionId, name });
}
/**
* Commit the transaction.
*/
async commit(): Promise<void> {
this.ensureActive();
await this.client.query('COMMIT');
const durationMs = this.startTime ? Date.now() - this.startTime : 0;
this.emit('commit', {
transactionId: this.transactionId,
queryCount: this.queryCount,
durationMs,
});
this.cleanup();
}
/**
* Rollback the transaction.
*/
async rollback(): Promise<void> {
if (!this.isActive) {
return; // Already rolled back or not started
}
await this.client.query('ROLLBACK');
const durationMs = this.startTime ? Date.now() - this.startTime : 0;
this.emit('rollback', {
transactionId: this.transactionId,
queryCount: this.queryCount,
durationMs,
});
this.cleanup();
}
// ===========================================================================
// Vector Operations within Transaction
// ===========================================================================
/**
* Perform vector search within the transaction.
*/
async search(options: VectorSearchOptions): Promise<VectorSearchResult[]> {
this.ensureActive();
const { sql, params } = this.buildSearchQuery(options);
const result = await this.client.query<{
id: string | number;
distance: number;
[key: string]: unknown;
}>(sql, params);
this.queryCount++;
const metric = options.metric ?? 'cosine';
return result.rows.map((row, index) => {
const score = metric === 'cosine' || metric === 'dot'
? 1 - row.distance
: 1 / (1 + row.distance);
const searchResult: VectorSearchResult = {
id: row.id,
score,
distance: row.distance,
rank: index + 1,
retrievedAt: new Date(),
};
if (options.includeVector && row[options.vectorColumn ?? 'embedding']) {
(searchResult as { vector?: number[] }).vector = this.parseVector(
row[options.vectorColumn ?? 'embedding'] as string
);
}
if (options.includeMetadata && row.metadata) {
(searchResult as { metadata?: Record<string, unknown> }).metadata =
row.metadata as Record<string, unknown>;
}
return searchResult;
});
}
/**
* Insert vectors within the transaction.
*/
async insert(options: VectorInsertOptions): Promise<BatchResult<string>> {
this.ensureActive();
const startTime = Date.now();
const tableName = options.tableName ?? this.defaultTableName;
const vectorColumn = options.vectorColumn ?? 'embedding';
const schemaPrefix = this.schema ? `${this.escapeIdentifier(this.schema)}.` : '';
const successful: string[] = [];
const errors: Array<{ index: number; message: string; input?: unknown }> = [];
// Build multi-row INSERT
const values: string[] = [];
const params: unknown[] = [];
let paramIndex = 1;
for (const item of options.vectors) {
const vector = this.formatVector(item.vector);
const metadata = item.metadata ? JSON.stringify(item.metadata) : null;
if (item.id !== undefined) {
values.push(`($${paramIndex++}, '${vector}'::vector, $${paramIndex++}::jsonb)`);
params.push(item.id, metadata);
} else {
values.push(`(gen_random_uuid(), '${vector}'::vector, $${paramIndex++}::jsonb)`);
params.push(metadata);
}
}
let sql = `INSERT INTO ${schemaPrefix}${this.escapeIdentifier(tableName)} `;
sql += `(id, ${this.escapeIdentifier(vectorColumn)}, metadata) VALUES ${values.join(', ')}`;
if (options.upsert) {
const conflictCols = options.conflictColumns ?? ['id'];
sql += ` ON CONFLICT (${conflictCols.join(', ')}) DO UPDATE SET `;
sql += `${this.escapeIdentifier(vectorColumn)} = EXCLUDED.${this.escapeIdentifier(vectorColumn)}, `;
sql += `metadata = EXCLUDED.metadata`;
}
sql += ' RETURNING id';
try {
const result = await this.client.query<{ id: string }>(sql, params);
this.queryCount++;
if (result.rows) {
successful.push(...result.rows.map(r => String(r.id)));
}
} catch (error) {
errors.push({
index: 0,
message: (error as Error).message,
});
}
const durationMs = Date.now() - startTime;
const insertedCount = successful.length;
return {
total: options.vectors.length,
successful: insertedCount,
failed: options.vectors.length - insertedCount,
results: successful,
errors: errors.length > 0 ? errors : undefined,
durationMs,
throughput: insertedCount / (durationMs / 1000),
};
}
/**
* Update a vector within the transaction.
*/
async update(options: VectorUpdateOptions): Promise<boolean> {
this.ensureActive();
const tableName = options.tableName ?? this.defaultTableName;
const vectorColumn = options.vectorColumn ?? 'embedding';
const schemaPrefix = this.schema ? `${this.escapeIdentifier(this.schema)}.` : '';
const setClauses: string[] = [];
const params: unknown[] = [];
let paramIndex = 1;
if (options.vector) {
const vector = this.formatVector(options.vector);
setClauses.push(`${this.escapeIdentifier(vectorColumn)} = '${vector}'::vector`);
}
if (options.metadata) {
if (options.mergeMetadata) {
setClauses.push(`metadata = metadata || $${paramIndex++}::jsonb`);
} else {
setClauses.push(`metadata = $${paramIndex++}::jsonb`);
}
params.push(JSON.stringify(options.metadata));
}
if (setClauses.length === 0) {
return false;
}
params.push(options.id);
const sql = `UPDATE ${schemaPrefix}${this.escapeIdentifier(tableName)} ` +
`SET ${setClauses.join(', ')} WHERE id = $${paramIndex}`;
const result = await this.client.query(sql, params);
this.queryCount++;
return (result.rowCount ?? 0) > 0;
}
/**
* Delete vectors within the transaction.
*
* @param ids - IDs to delete
* @param tableName - Table name (optional)
* @returns Number of deleted rows
*/
async delete(ids: (string | number)[], tableName?: string): Promise<number> {
this.ensureActive();
const table = tableName ?? this.defaultTableName;
const schemaPrefix = this.schema ? `${this.escapeIdentifier(this.schema)}.` : '';
const placeholders = ids.map((_, i) => `$${i + 1}`).join(', ');
const sql = `DELETE FROM ${schemaPrefix}${this.escapeIdentifier(table)} WHERE id IN (${placeholders})`;
const result = await this.client.query(sql, ids);
this.queryCount++;
return result.rowCount ?? 0;
}
/**
* Execute a raw query within the transaction.
*/
async query<T = Record<string, unknown>>(
sql: string,
params?: unknown[]
): Promise<QueryResult<T>> {
this.ensureActive();
const startTime = Date.now();
const result = await this.client.query<T>(sql, params);
this.queryCount++;
return {
rows: result.rows,
rowCount: result.rowCount ?? 0,
durationMs: Date.now() - startTime,
command: result.command,
};
}
// ===========================================================================
// Utility Methods
// ===========================================================================
/**
* Get transaction status.
*/
getStatus(): {
transactionId: string | null;
isActive: boolean;
savepoints: string[];
queryCount: number;
durationMs: number;
} {
return {
transactionId: this.transactionId,
isActive: this.isActive,
savepoints: Array.from(this.savepoints),
queryCount: this.queryCount,
durationMs: this.startTime ? Date.now() - this.startTime : 0,
};
}
/**
* Ensure transaction is active.
*/
private ensureActive(): void {
if (!this.isActive) {
throw new Error('Transaction is not active. Call begin() first.');
}
}
/**
* Build search query SQL.
*/
private buildSearchQuery(options: VectorSearchOptions): { sql: string; params: unknown[] } {
const tableName = options.tableName ?? this.defaultTableName;
const vectorColumn = options.vectorColumn ?? 'embedding';
const metric = options.metric ?? 'cosine';
const operator = DISTANCE_OPERATORS[metric] ?? '<=>';
const queryVector = this.formatVector(options.query);
const schemaPrefix = this.schema ? `${this.escapeIdentifier(this.schema)}.` : '';
const selectColumns = options.selectColumns ?? ['id'];
const columnList = [...selectColumns];
if (options.includeVector) {
columnList.push(vectorColumn);
}
if (options.includeMetadata) {
columnList.push('metadata');
}
const distanceExpr = `${this.escapeIdentifier(vectorColumn)} ${operator} '${queryVector}'::vector`;
columnList.push(`(${distanceExpr}) as distance`);
const whereClauses: string[] = [];
const params: unknown[] = [];
let paramIndex = 1;
if (options.filter) {
for (const [key, value] of Object.entries(options.filter)) {
if (key === 'metadata') {
whereClauses.push(`metadata @> $${paramIndex++}::jsonb`);
params.push(JSON.stringify(value));
} else {
whereClauses.push(`${this.escapeIdentifier(key)} = $${paramIndex++}`);
params.push(value);
}
}
}
let sql = `SELECT ${columnList.join(', ')} FROM ${schemaPrefix}${this.escapeIdentifier(tableName)}`;
if (whereClauses.length > 0) {
sql += ` WHERE ${whereClauses.join(' AND ')}`;
}
sql += ` ORDER BY ${distanceExpr} ASC`;
sql += ` LIMIT ${options.k}`;
return { sql, params };
}
/**
* Cleanup transaction state.
*/
private cleanup(): void {
this.isActive = false;
this.savepoints.clear();
this.transactionId = null;
this.startTime = null;
}
/**
* Format vector for SQL.
*/
private formatVector(vector: number[] | Float32Array): string {
const arr = Array.isArray(vector) ? vector : Array.from(vector);
return `[${arr.join(',')}]`;
}
/**
* Parse vector from SQL result.
*/
private parseVector(vectorStr: string): number[] {
const cleaned = vectorStr.replace(/[\[\]{}]/g, '');
return cleaned.split(',').map(Number);
}
/**
* Escape SQL identifier.
*/
private escapeIdentifier(identifier: string): string {
return `"${identifier.replace(/"/g, '""')}"`;
}
}
// ============================================================================
// BatchProcessor Class
// ============================================================================
/**
* Batch processor for large dataset operations.
*
* Provides efficient processing of large datasets with configurable
* batch sizes, concurrency, and error handling.
*
* @example
* ```typescript
* const processor = new BatchProcessor(bridge, { batchSize: 500, concurrency: 4 });
*
* async function* loadData() {
* for (const item of massiveDataset) {
* yield item;
* }
* }
*
* for await (const result of processor.processBatch(loadData(), async (batch) => {
* return batch.map(item => processItem(item));
* })) {
* console.log(result);
* }
* ```
*/
export class BatchProcessor extends EventEmitter {
private readonly pool: Pool;
private readonly options: Required<BatchOptions>;
private readonly schema?: string;
constructor(
pool: Pool,
options: BatchOptions & { schema?: string } = {}
) {
super();
this.pool = pool;
this.schema = options.schema;
this.options = {
batchSize: options.batchSize ?? DEFAULT_BATCH_SIZE,
concurrency: options.concurrency ?? DEFAULT_CONCURRENCY,
retryOnFailure: options.retryOnFailure ?? true,
maxRetries: options.maxRetries ?? 3,
useTransaction: options.useTransaction ?? false,
};
}
/**
* Process items in batches with custom processor function.
*
* @param items - Async iterable of items to process
* @param processor - Batch processing function
* @param options - Processing options
* @yields Processed results
*/
async *processBatch<T, R>(
items: AsyncIterable<T>,
processor: (batch: T[]) => Promise<R[]>,
options?: {
batchSize?: number;
concurrency?: number;
onBatchComplete?: (batchIndex: number, results: R[]) => void;
}
): AsyncGenerator<R, void, undefined> {
const batchSize = options?.batchSize ?? this.options.batchSize;
const concurrency = options?.concurrency ?? this.options.concurrency;
let batch: T[] = [];
let batchIndex = 0;
const pendingBatches: Promise<{ index: number; results: R[] }>[] = [];
// Process items and accumulate into batches
for await (const item of items) {
batch.push(item);
if (batch.length >= batchSize) {
const currentBatch = batch;
const currentIndex = batchIndex;
batch = [];
batchIndex++;
// Add batch to processing queue
const batchPromise = this.processSingleBatch(
currentBatch,
processor,
currentIndex
).then(results => {
options?.onBatchComplete?.(currentIndex, results);
return { index: currentIndex, results };
});
pendingBatches.push(batchPromise);
// Yield results when we have enough pending batches
if (pendingBatches.length >= concurrency) {
const completed = await Promise.race(
pendingBatches.map((p, i) => p.then(r => ({ ...r, promiseIndex: i })))
);
// Remove completed batch from pending
pendingBatches.splice(completed.promiseIndex, 1);
for (const result of completed.results) {
yield result;
}
}
}
}
// Process remaining batch
if (batch.length > 0) {
const results = await this.processSingleBatch(batch, processor, batchIndex);
options?.onBatchComplete?.(batchIndex, results);
for (const result of results) {
yield result;
}
}
// Wait for remaining pending batches
const remainingResults = await Promise.all(pendingBatches);
for (const { results } of remainingResults.sort((a, b) => a.index - b.index)) {
for (const result of results) {
yield result;
}
}
}
/**
* Perform parallel search across multiple queries.
*
* @param queries - Array of query vectors
* @param options - Search options
* @returns Array of search results for each query
*/
async parallelSearch(
queries: number[][],
options: Omit<VectorSearchOptions, 'query'>
): Promise<VectorSearchResult[][]> {
const concurrency = this.options.concurrency;
const results: VectorSearchResult[][] = new Array(queries.length);
// Process queries in parallel batches
for (let i = 0; i < queries.length; i += concurrency) {
const batchQueries = queries.slice(i, i + concurrency);
const batchResults = await Promise.all(
batchQueries.map((query, j) =>
this.executeSingleSearch({ ...options, query } as VectorSearchOptions)
.then(r => ({ index: i + j, results: r }))
)
);
for (const { index, results: searchResults } of batchResults) {
results[index] = searchResults;
}
this.emit('batch_search_complete', {
batchStart: i,
batchEnd: Math.min(i + concurrency, queries.length),
total: queries.length,
});
}
return results;
}
/**
* Process a single batch with retry support.
*/
private async processSingleBatch<T, R>(
batch: T[],
processor: (batch: T[]) => Promise<R[]>,
batchIndex: number
): Promise<R[]> {
let attempt = 0;
let lastError: Error | null = null;
while (attempt < this.options.maxRetries) {
attempt++;
try {
const results = await processor(batch);
this.emit('batch_complete', { batchIndex, attempt, success: true });
return results;
} catch (error) {
lastError = error as Error;
this.emit('batch_error', { batchIndex, attempt, error: lastError });
if (!this.options.retryOnFailure || attempt >= this.options.maxRetries) {
break;
}
// Exponential backoff
await this.sleep(Math.min(1000 * Math.pow(2, attempt - 1), 10000));
}
}
throw new Error(`Batch ${batchIndex} failed after ${attempt} attempts: ${lastError?.message}`);
}
/**
* Execute a single search query.
*/
private async executeSingleSearch(
options: VectorSearchOptions
): Promise<VectorSearchResult[]> {
const client = await this.pool.connect();
try {
const { sql, params } = this.buildSearchQuery(options);
const result = await client.query<{
id: string | number;
distance: number;
[key: string]: unknown;
}>(sql, params);
const metric = options.metric ?? 'cosine';
return result.rows.map((row, index) => {
const score = metric === 'cosine' || metric === 'dot'
? 1 - row.distance
: 1 / (1 + row.distance);
return {
id: row.id,
score,
distance: row.distance,
rank: index + 1,
retrievedAt: new Date(),
};
});
} finally {
client.release();
}
}
/**
* Build search query SQL.
*/
private buildSearchQuery(options: VectorSearchOptions): { sql: string; params: unknown[] } {
const tableName = options.tableName ?? 'vectors';
const vectorColumn = options.vectorColumn ?? 'embedding';
const metric = options.metric ?? 'cosine';
const operator = DISTANCE_OPERATORS[metric] ?? '<=>';
const queryVector = this.formatVector(options.query);
const schemaPrefix = this.schema ? `"${this.schema}".` : '';
const selectColumns = options.selectColumns ?? ['id'];
const distanceExpr = `"${vectorColumn}" ${operator} '${queryVector}'::vector`;
let sql = `SELECT ${selectColumns.join(', ')}, (${distanceExpr}) as distance ` +
`FROM ${schemaPrefix}"${tableName}" ` +
`ORDER BY ${distanceExpr} ASC ` +
`LIMIT ${options.k}`;
return { sql, params: [] };
}
/**
* Format vector for SQL.
*/
private formatVector(vector: number[] | Float32Array): string {
const arr = Array.isArray(vector) ? vector : Array.from(vector);
return `[${arr.join(',')}]`;
}
/**
* Sleep utility.
*/
private sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
}
// ============================================================================
// PoolEventEmitter Class
// ============================================================================
/**
* Event emitter for connection pool lifecycle events.
*
* Provides typed event handling for pool operations.
*
* @example
* ```typescript
* const poolEvents = new PoolEventEmitter(pool);
*
* poolEvents.on('pool:connect', (client) => {
* console.log('Client connected');
* });
*
* poolEvents.on('pool:error', (error, client) => {
* console.error('Pool error:', error);
* });
* ```
*/
export class PoolEventEmitter extends EventEmitter {
private readonly pool: Pool;
constructor(pool: Pool) {
super();
this.pool = pool;
this.setupListeners();
}
/**
* Add typed event listener.
*/
on<K extends keyof PoolEvents>(event: K, listener: PoolEvents[K]): this {
return super.on(event, listener as (...args: unknown[]) => void);
}
/**
* Add one-time typed event listener.
*/
once<K extends keyof PoolEvents>(event: K, listener: PoolEvents[K]): this {
return super.once(event, listener as (...args: unknown[]) => void);
}
/**
* Remove typed event listener.
*/
off<K extends keyof PoolEvents>(event: K, listener: PoolEvents[K]): this {
return super.off(event, listener as (...args: unknown[]) => void);
}
/**
* Emit typed event.
*/
emit<K extends keyof PoolEvents>(
event: K,
...args: Parameters<PoolEvents[K]>
): boolean {
return super.emit(event, ...args);
}
/**
* Get current pool statistics.
*/
getStats(): {
totalCount: number;
idleCount: number;
waitingCount: number;
} {
return {
totalCount: this.pool.totalCount,
idleCount: this.pool.idleCount,
waitingCount: this.pool.waitingCount,
};
}
/**
* Setup pool event listeners.
*/
private setupListeners(): void {
this.pool.on('connect', (...args: unknown[]) => {
const client = args[0] as PoolClient;
this.emit('pool:connect', client);
});
this.pool.on('acquire', (...args: unknown[]) => {
const client = args[0] as PoolClient;
this.emit('pool:acquire', client);
});
this.pool.on('release', (...args: unknown[]) => {
const client = args[0] as PoolClient;
this.emit('pool:release', client);
});
this.pool.on('remove', (...args: unknown[]) => {
const client = args[0] as PoolClient;
this.emit('pool:remove', client);
});
this.pool.on('error', (...args: unknown[]) => {
const error = args[0] as Error;
const client = args[1] as PoolClient | undefined;
this.emit('pool:error', error, client);
});
}
}
// ============================================================================
// Factory Functions
// ============================================================================
/**
* Create a new RuVectorStream instance.
*/
export function createRuVectorStream(
pool: Pool,
options?: {
schema?: string;
defaultTableName?: string;
highWaterMark?: number;
}
): RuVectorStream {
return new RuVectorStream(pool, options);
}
/**
* Create a new RuVectorTransaction instance.
*/
export function createRuVectorTransaction(
client: PoolClient,
options?: {
schema?: string;
defaultTableName?: string;
}
): RuVectorTransaction {
return new RuVectorTransaction(client, options);
}
/**
* Create a new BatchProcessor instance.
*/
export function createBatchProcessor(
pool: Pool,
options?: BatchOptions & { schema?: string }
): BatchProcessor {
return new BatchProcessor(pool, options);
}
/**
* Create a new PoolEventEmitter instance.
*/
export function createPoolEventEmitter(pool: Pool): PoolEventEmitter {
return new PoolEventEmitter(pool);
}
// ============================================================================
// Default Export
// ============================================================================
export default {
RuVectorStream,
RuVectorTransaction,
BatchProcessor,
PoolEventEmitter,
createRuVectorStream,
createRuVectorTransaction,
createBatchProcessor,
createPoolEventEmitter,
};