UNPKG

@xiaohui-wang/mcpadvisor

Version:

MCP Advisor & Installation - Find the right MCP server for your needs

132 lines (131 loc) 5.67 kB
/** * 向量搜索管理器模块 * 提供向量搜索相关的功能函数 */ import { getTextEmbedding } from '../../../utils/embedding.js'; import logger from '../../../utils/logger.js'; /** * 转换为 MCPServerResponse 格式 */ export const convertToServerResponse = (key, server) => ({ id: key, title: server.display_name, description: server.description, sourceUrl: server.repository.url, similarity: 0, // 将在搜索时计算 }); /** * 处理并索引数据 * 如果提供的引擎不支持写操作,将记录警告并跳过索引步骤 * 实现基于时间的数据更新策略,只在数据过期时才重建索引 */ export const processAndIndexData = async (data, vectorEngine, createSearchableText, forceUpdate = false) => { // 检查引擎是否支持写操作 const writableEngine = vectorEngine; const supportsWriteOperations = typeof writableEngine.addEntry === 'function' && typeof writableEngine.clear === 'function'; if (!supportsWriteOperations) { logger.warn('Vector engine does not support write operations, skipping indexing'); return; } try { // 检查数据是否需要更新 let needsUpdate = forceUpdate; if (!needsUpdate) { try { // 动态导入 DataUpdateManager,避免循环依赖 const { DataUpdateManager, UpdateType } = await import('../../providers/oceanbase/dataUpdateManager.js'); // 检查数据是否过期(默认 1 小时) needsUpdate = await DataUpdateManager.needsUpdate(UpdateType.VECTOR_DATA, 1); if (!needsUpdate) { logger.info('Vector data is still fresh, skipping indexing'); return; } logger.info('Vector data is outdated, starting reindexing'); } catch (error) { // 如果无法检查更新时间,默认需要更新 const message = error instanceof Error ? error.message : String(error); logger.warn(`Could not check data update time, defaulting to update: ${message}`); needsUpdate = true; } } // 如果需要更新,清除并重建索引 if (needsUpdate) { // 清除现有索引 await writableEngine.clear(); // 处理每个服务器条目 const entries = Object.entries(data); const startTime = Date.now(); for (const [key, server] of entries) { // 创建可搜索文本 const searchableText = createSearchableText(server); // 获取文本嵌入 const embedding = await getTextEmbedding(searchableText); // 转换为 MCPServerResponse 格式 const serverResponse = convertToServerResponse(key, server); // 添加到向量索引 await writableEngine.addEntry(key, embedding, serverResponse); } const duration = (Date.now() - startTime) / 1000; logger.info(`Indexed ${entries.length} MCP servers in ${duration.toFixed(2)} seconds`); // 更新数据更新时间 try { const { DataUpdateManager, UpdateType } = await import('../../providers/oceanbase/dataUpdateManager.js'); await DataUpdateManager.updateLastUpdateTime(UpdateType.VECTOR_DATA); logger.info('Updated vector data timestamp'); } catch (error) { const message = error instanceof Error ? error.message : String(error); logger.warn(`Could not update data timestamp: ${message}`); } } } catch (error) { // 使用增强的日志记录方式,传递完整错误对象 const message = error instanceof Error ? error.message : String(error); logger.error(`Error indexing data: ${message}`, { error, data: { dataSize: Object.keys(data).length, engineType: vectorEngine.constructor.name, forceUpdate, errorType: error instanceof Error ? error.constructor.name : typeof error, }, }); throw error; } }; /** * 执行向量搜索 * 同时使用向量相似度和文本查询来提高搜索质量 */ export const performVectorSearch = async (query, vectorEngine) => { try { // 获取查询嵌入 const queryEmbedding = await getTextEmbedding(query); // 设置搜索选项,包括最小相似度和文本查询 const searchOptions = { minSimilarity: 0.3, // 降低相似度阈值,确保能返回结果 textQuery: query, // 使用原始查询作为文本查询 }; // 使用向量引擎搜索,传递向量、结果数量和搜索选项 const results = await vectorEngine.search(queryEmbedding, 5, searchOptions); logger.debug(`Found ${results.length} results from hybrid vector search`); return results; } catch (error) { // 使用增强的日志记录方式,传递完整错误对象 const message = error instanceof Error ? error.message : String(error); logger.error(`Error in vector search: ${message}`, { error, data: { query, minSimilarity: 0.3, useTextQuery: true, errorType: error instanceof Error ? error.constructor.name : typeof error, }, }); throw error; } };