UNPKG

@vfarcic/dot-ai

Version:

AI-powered development productivity platform that enhances software development workflows through intelligent automation and AI-driven assistance

297 lines (296 loc) 10.1 kB
"use strict"; /** * Embedding Migration Handler * * Handles POST /api/v1/embeddings/migrate requests. * Re-embeds all data in vector collections when switching embedding providers * (e.g., OpenAI 1536-dim → local TEI 384-dim). * * PRD #384: Optional Local Embedding Service */ Object.defineProperty(exports, "__esModule", { value: true }); exports.handleEmbeddingMigration = handleEmbeddingMigration; const embedding_service_1 = require("../core/embedding-service"); const plugin_registry_1 = require("../core/plugin-registry"); const PLUGIN_NAME = 'agentic-tools'; const BATCH_SIZE = 50; /** * Invoke a plugin tool and extract the result data */ async function invokePlugin(tool, args) { const response = await (0, plugin_registry_1.invokePluginTool)(PLUGIN_NAME, tool, args); if (!response.success) { const error = response.error; const message = typeof error === 'object' && error?.message ? error.message : String(error || `Plugin tool ${tool} failed`); throw new Error(message); } const toolResult = response.result; if (!toolResult || typeof toolResult !== 'object') { throw new Error(`Plugin tool ${tool} returned invalid result`); } if (toolResult.success === false) { throw new Error(toolResult.error || toolResult.message || `Plugin tool ${tool} failed`); } return toolResult.data; } /** * Migrate a single collection: read all points, re-embed, recreate collection, store */ async function migrateCollection(collectionName, embeddingService, targetDimensions, logger, requestId) { // Get current collection stats const stats = await invokePlugin('collection_stats', { collection: collectionName, }); if (!stats.exists) { return { collection: collectionName, status: 'failed', previousDimensions: 0, newDimensions: targetDimensions, total: 0, processed: 0, failed: 0, error: `Collection '${collectionName}' does not exist`, }; } const previousDimensions = stats.vectorSize; // Skip if dimensions already match if (previousDimensions === targetDimensions) { return { collection: collectionName, status: 'skipped', previousDimensions, newDimensions: targetDimensions, total: stats.pointsCount, processed: 0, failed: 0, }; } logger.info('Starting collection migration', { requestId, collection: collectionName, previousDimensions, targetDimensions, pointsCount: stats.pointsCount, }); // List all points (payload only, no vectors) const documents = await invokePlugin('vector_list', { collection: collectionName, limit: 10000, }); const total = documents.length; // Extract searchText from each document const pointsWithText = documents.map(doc => ({ id: doc.id, payload: doc.payload, searchText: doc.payload.searchText || '', })); // Filter out points without searchText const migrateablePoints = pointsWithText.filter(p => p.searchText.length > 0); const skippedNoText = total - migrateablePoints.length; if (skippedNoText > 0) { logger.warn('Some points have no searchText and will be skipped', { requestId, collection: collectionName, skippedNoText, }); } // Batch re-embed all texts const allTexts = migrateablePoints.map(p => p.searchText); const allEmbeddings = []; let failedEmbeddings = 0; for (let i = 0; i < allTexts.length; i += BATCH_SIZE) { const batch = allTexts.slice(i, i + BATCH_SIZE); try { const embeddings = await embeddingService.generateEmbeddings(batch); allEmbeddings.push(...embeddings); } catch (error) { logger.error('Batch embedding failed', error instanceof Error ? error : new Error(String(error)), { requestId, collection: collectionName, batchStart: i, batchSize: batch.length, }); // Fill with empty arrays to track failures for (let j = 0; j < batch.length; j++) { allEmbeddings.push([]); } failedEmbeddings += batch.length; } } // Recreate collection with new dimensions (initializeCollection auto-deletes on mismatch) await invokePlugin('collection_initialize', { collection: collectionName, vectorSize: targetDimensions, createTextIndex: true, }); // Store all re-embedded points let processed = 0; let failed = failedEmbeddings + skippedNoText; for (let i = 0; i < migrateablePoints.length; i++) { const point = migrateablePoints[i]; const embedding = allEmbeddings[i]; // Skip points where embedding failed if (!embedding || embedding.length === 0) { continue; } try { await invokePlugin('vector_store', { collection: collectionName, id: point.id, embedding, payload: point.payload, }); processed++; } catch (error) { logger.error('Failed to store re-embedded point', error instanceof Error ? error : new Error(String(error)), { requestId, collection: collectionName, pointId: point.id, }); failed++; } } logger.info('Collection migration complete', { requestId, collection: collectionName, total, processed, failed, }); return { collection: collectionName, status: 'migrated', previousDimensions, newDimensions: targetDimensions, total, processed, failed, }; } /** * Handle embedding migration request */ async function handleEmbeddingMigration(body, logger, requestId) { // Check plugin availability if (!(0, plugin_registry_1.isPluginInitialized)()) { return { success: false, error: { code: 'PLUGIN_UNAVAILABLE', message: 'Plugin system is not initialized. Vector database operations are unavailable.', }, meta: { timestamp: new Date().toISOString(), requestId, version: 'v1', }, }; } // Check embedding service availability const embeddingService = new embedding_service_1.EmbeddingService(); if (!embeddingService.isAvailable()) { const status = embeddingService.getStatus(); return { success: false, error: { code: 'EMBEDDING_SERVICE_UNAVAILABLE', message: `Embedding service is not available: ${status.reason || 'No embedding provider configured'}`, }, meta: { timestamp: new Date().toISOString(), requestId, version: 'v1', }, }; } const targetDimensions = embeddingService.getDimensions(); // Parse request body const parsedBody = body && typeof body === 'object' ? body : {}; const requestedCollection = typeof parsedBody.collection === 'string' ? parsedBody.collection : undefined; // Discover collections let collectionsToMigrate; if (requestedCollection) { collectionsToMigrate = [requestedCollection]; } else { try { collectionsToMigrate = await invokePlugin('collection_list', {}); } catch (error) { const message = error instanceof Error ? error.message : String(error); return { success: false, error: { code: 'MIGRATION_ERROR', message: `Failed to list collections: ${message}`, }, meta: { timestamp: new Date().toISOString(), requestId, version: 'v1', }, }; } } logger.info('Starting embedding migration', { requestId, targetDimensions, collections: collectionsToMigrate, requestedCollection: requestedCollection || 'all', }); // Migrate each collection const results = []; for (const collectionName of collectionsToMigrate) { try { const result = await migrateCollection(collectionName, embeddingService, targetDimensions, logger, requestId); results.push(result); } catch (error) { const message = error instanceof Error ? error.message : String(error); logger.error('Collection migration failed', error instanceof Error ? error : new Error(message), { requestId, collection: collectionName, }); results.push({ collection: collectionName, status: 'failed', previousDimensions: 0, newDimensions: targetDimensions, total: 0, processed: 0, failed: 0, error: message, }); } } // Build summary const summary = { totalCollections: results.length, migrated: results.filter(r => r.status === 'migrated').length, skipped: results.filter(r => r.status === 'skipped').length, failed: results.filter(r => r.status === 'failed').length, }; logger.info('Embedding migration complete', { requestId, summary, }); return { success: true, data: { collections: results, summary, }, meta: { timestamp: new Date().toISOString(), requestId, version: 'v1', }, }; }