UNPKG

@ever_cheng/memory-task-mcp

Version:

Memory and task management MCP Server

588 lines 22.1 kB
"use strict"; /** * Embedding Service for MemTask * * Provides multilingual-e5-large embedding generation with caching and batch processing. * Optimized for semantic search and similarity calculations. */ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.EmbeddingService = exports.DEFAULT_EMBEDDING_CONFIG = void 0; const logger_1 = __importDefault(require("./logger")); const path = __importStar(require("path")); const fs = __importStar(require("fs")); // 動態導入 @xenova/transformers 以避免 ES module 問題 let pipeline; /** * 動態加載 transformers 模塊 * 使用 Function constructor 避免 TypeScript 編譯時問題 */ async function loadTransformers() { const dynamicImport = new Function('specifier', 'return import(specifier)'); const transformers = await dynamicImport('@xenova/transformers'); // 配置 ONNX runtime 環境變量以避免 worker 路徑問題 const { env } = transformers; if (env) { // 完全禁用 web workers 和多線程,使用同步執行 env.backends.onnx.wasm.numThreads = 1; env.backends.onnx.wasm.simd = false; env.backends.onnx.wasm.proxy = false; env.backends.onnx.wasm.wasmPaths = undefined; // 禁用所有 worker 相關功能 env.useBrowserCache = false; env.useWebWorkers = false; // 設置正確的 models 目錄路徑 - 優先使用環境變數,否則智慧搜尋 const modelsDir = process.env.EMBEDDING_MODELS_PATH || findModelsPath(); if (!modelsDir) { logger_1.default.warn('No embedding models found, embedding functionality will be disabled'); return null; // 讓 embedding 功能失效但不中斷程式 } logger_1.default.info(`Using embedding models path: ${modelsDir}`); env.cacheDir = modelsDir; env.localModelPath = modelsDir; env.allowRemoteModels = false; // 強制使用本地模型避免網絡下載 env.allowLocalModels = true; } return transformers; } /** * 智慧搜尋模型目錄路徑 */ function findModelsPath() { // 可能的模型路徑位置(按優先順序) const possiblePaths = [ process.cwd() + '/models', // 當前執行目錄 process.cwd() + '/MemTask/models', // 當前執行目錄下的 MemTask path.resolve(process.cwd(), '../models'), // 父目錄 path.resolve(process.cwd(), '../MemTask/models'), // 父目錄下的 MemTask path.resolve(__dirname, '../../models'), // 編譯後的相對路徑(備用) '/opt/models', // 系統級路徑 process.env.HOME + '/.local/share/models' // 用戶級路徑 ]; for (const modelsPath of possiblePaths) { try { if (!fs.existsSync(modelsPath)) continue; // 檢查是否有任何有效的模型 const hasValidModel = checkForValidModels(modelsPath); if (hasValidModel) { logger_1.default.info(`Found models directory at: ${modelsPath}`); return modelsPath; } } catch (error) { // 忽略單個路徑的錯誤,繼續搜尋 continue; } } logger_1.default.warn('No valid models directory found in any searched locations'); return null; } /** * 檢查目錄下是否有有效的模型 */ function checkForValidModels(basePath) { try { const entries = fs.readdirSync(basePath, { withFileTypes: true }); for (const entry of entries) { if (entry.isDirectory()) { const modelPath = path.join(basePath, entry.name); // 檢查是否有 config.json 和 onnx/model.onnx const configPath = path.join(modelPath, 'config.json'); const onnxPath = path.join(modelPath, 'onnx', 'model.onnx'); if (fs.existsSync(configPath) && fs.existsSync(onnxPath)) { return true; // 找到至少一個有效模型 } else { // 遞迴檢查子目錄(處理 intfloat/multilingual-e5-base 這種結構) if (checkForValidModels(modelPath)) { return true; } } } } return false; } catch (error) { return false; } } /** * 檢查本地可用的模型 */ function getAvailableLocalModels() { // 優先使用環境變數,否則智慧搜尋 const modelsDir = process.env.EMBEDDING_MODELS_PATH || findModelsPath(); if (!modelsDir) { logger_1.default.warn('No embedding models directory found, no local models available'); return []; } logger_1.default.debug(`Scanning for models in: ${modelsDir}`); const availableModels = []; try { // 檢查 models 目錄下的模型 const checkModelDir = (basePath, prefix = '') => { if (!fs.existsSync(basePath)) return; const entries = fs.readdirSync(basePath, { withFileTypes: true }); for (const entry of entries) { if (entry.isDirectory()) { const modelPath = path.join(basePath, entry.name); const fullModelName = prefix ? `${prefix}/${entry.name}` : entry.name; // 檢查是否有 config.json 和 onnx/model.onnx const configPath = path.join(modelPath, 'config.json'); const onnxPath = path.join(modelPath, 'onnx', 'model.onnx'); if (fs.existsSync(configPath) && fs.existsSync(onnxPath)) { availableModels.push(fullModelName); } else { // 繼續往下檢查子目錄 checkModelDir(modelPath, fullModelName); } } } }; // 檢查 models 目錄下的模型 checkModelDir(modelsDir); } catch (error) { logger_1.default.warn('檢查本地模型時出錯:', error); } return availableModels; } const cache_1 = require("./cache"); /** * Default embedding configuration for multilingual-e5-base * 使用 models 中可用的模型 */ exports.DEFAULT_EMBEDDING_CONFIG = { // 檢查環境變數,如果沒有則使用 models 中可用的模型 modelName: process.env.EMBEDDING_MODEL || 'intfloat/multilingual-e5-base', dimension: 768, // multilingual-e5-base 的維度是 768 maxSequenceLength: 512, batchSize: 32, device: 'cpu', quantized: false, // 改為 false 避免量化問題 cacheEnabled: true, cacheSize: parseInt(process.env.EMBEDDING_CACHE_SIZE || '1000'), cacheTtlMs: 24 * 60 * 60 * 1000 // 24 hours }; /** * Embedding Service Class */ class EmbeddingService { constructor(config = exports.DEFAULT_EMBEDDING_CONFIG) { this.model = null; this.cache = null; this.initialized = false; this.initializationPromise = null; this.config = config; if (this.config.cacheEnabled) { this.cache = new cache_1.CacheService({ maxSize: this.config.cacheSize, ttlMs: this.config.cacheTtlMs }); } } /** * 初始化 embedding 模型 */ async initialize() { if (this.initialized) return; if (this.initializationPromise) { return this.initializationPromise; } this.initializationPromise = this._initializeModel(); await this.initializationPromise; } async _initializeModel() { // 檢查本地可用模型 const availableModels = getAvailableLocalModels(); logger_1.default.debug(`本地可用模型:`, { availableModels }); // 如果配置的模型不可用,使用第一個可用的模型 let modelToUse = this.config.modelName; try { if (!availableModels.includes(this.config.modelName) && availableModels.length > 0) { modelToUse = availableModels[0]; logger_1.default.warn(`配置的模型 ${this.config.modelName} 不可用,使用 ${modelToUse}`, { configuredModel: this.config.modelName, usedModel: modelToUse }); } logger_1.default.info(`正在載入 embedding 模型: ${modelToUse}`, { model: modelToUse }); // 動態導入 @xenova/transformers const transformers = await loadTransformers(); if (!transformers) { throw new Error('Transformers not available, embedding models path not found'); } pipeline = transformers.pipeline; // 嘗試使用主要配置 try { // 優先使用環境變數,否則智慧搜尋 const modelsDir = process.env.EMBEDDING_MODELS_PATH || findModelsPath(); if (!modelsDir) { throw new Error('No embedding models directory found'); } logger_1.default.info(`Loading embedding model from: ${modelsDir}`); this.model = await pipeline('feature-extraction', modelToUse, { quantized: this.config.quantized, revision: 'main', // Node.js 兼容配置 - 使用絕對路徑 cache_dir: modelsDir, local_files_only: true, // 強制使用本地文件 use_external_data_format: false, // 強制使用 CPU 執行提供者 providers: ['cpu'] }); } catch (primaryError) { logger_1.default.warn(`主要配置失敗,嘗試備用配置: ${primaryError}`, { model: modelToUse }); // 使用更保守的配置作為備用方案 // 優先使用環境變數,否則智慧搜尋 const modelsDir = process.env.EMBEDDING_MODELS_PATH || findModelsPath(); if (!modelsDir) { throw new Error('No embedding models directory found'); } this.model = await pipeline('feature-extraction', modelToUse, { quantized: false, revision: 'main', // 使用絕對路徑 cache_dir: modelsDir, local_files_only: true, // 強制使用本地文件 // 強制使用 Node.js 兼容模式 execution_providers: ['CPUExecutionProvider'], inter_op_num_threads: 1, intra_op_num_threads: 1, // 禁用所有 web 功能 use_external_data_format: false, providers: ['cpu'] }); } this.initialized = true; // 更新配置中的模型名稱為實際使用的模型 this.config.modelName = modelToUse; logger_1.default.info(`✅ Embedding 模型載入成功: ${modelToUse}`, { model: modelToUse }); } catch (error) { logger_1.default.error(`❌ Embedding 模型載入失敗`, error, { model: modelToUse }); logger_1.default.warn('Embedding functionality will be disabled due to model initialization failure'); this.model = null; this.initialized = true; // 標記為已初始化但功能失效 } } /** * 生成單個文本的 embedding */ async generateEmbedding(text) { const startTime = Date.now(); // 檢查緩存 if (this.cache) { const cached = this.cache.get(text); if (cached) { return { embedding: cached, text, model: this.config.modelName, dimension: this.config.dimension, processingTime: Date.now() - startTime }; } } await this.initialize(); if (!this.model) { logger_1.default.warn('Embedding model not available, returning empty embedding'); return { embedding: [], text, model: this.config.modelName, dimension: 0, processingTime: 0 }; } try { // E5 模型需要特殊的查詢前綴 const prefixedText = this.addE5Prefix(text); // 截斷過長的文本 const truncatedText = this.truncateText(prefixedText); // 生成 embedding const result = await this.model(truncatedText, { pooling: 'mean', normalize: true }); // 提取 embedding 數組 const embedding = Array.from(result.data.flat ? result.data.flat() : result.data); // 緩存結果 if (this.cache) { this.cache.set(text, embedding); } const processingTime = Date.now() - startTime; return { embedding, text, model: this.config.modelName, dimension: embedding.length, processingTime }; } catch (error) { logger_1.default.error('生成 embedding 失敗', error, { text }); throw new Error(`Failed to generate embedding: ${error}`); } } /** * 批量生成 embeddings */ async batchGenerateEmbeddings(texts) { const startTime = Date.now(); if (texts.length === 0) { return { embeddings: [], texts: [], model: this.config.modelName, dimension: this.config.dimension, totalProcessingTime: 0, averageProcessingTime: 0 }; } if (!this.model) { logger_1.default.warn('Embedding model not available, returning empty embeddings for batch'); return { embeddings: texts.map(() => []), texts: texts, model: this.config.modelName, dimension: 0, totalProcessingTime: 0, averageProcessingTime: 0 }; } // 檢查緩存 const results = []; const uncachedTexts = []; const uncachedIndices = []; for (let i = 0; i < texts.length; i++) { const text = texts[i]; const cached = this.cache?.get(text); if (cached) { results[i] = cached; } else { uncachedTexts.push(text); uncachedIndices.push(i); } } // 處理未緩存的文本 if (uncachedTexts.length > 0) { const batches = this.createBatches(uncachedTexts, this.config.batchSize); for (const batch of batches) { const batchResults = await Promise.all(batch.map(text => this.generateEmbedding(text))); // 將結果放回正確的位置 for (let i = 0; i < batch.length; i++) { const originalIndex = uncachedIndices.shift(); results[originalIndex] = batchResults[i].embedding; } } } const totalProcessingTime = Date.now() - startTime; return { embeddings: results, texts, model: this.config.modelName, dimension: this.config.dimension, totalProcessingTime, averageProcessingTime: totalProcessingTime / texts.length }; } /** * 獲取本地可用的模型列表 */ static getAvailableModels() { return getAvailableLocalModels(); } /** * 計算兩個 embedding 之間的餘弦相似度 */ static cosineSimilarity(a, b) { if (a.length !== b.length) { throw new Error('Embedding dimensions must match'); } let dotProduct = 0; let normA = 0; let normB = 0; for (let i = 0; i < a.length; i++) { dotProduct += a[i] * b[i]; normA += a[i] * a[i]; normB += b[i] * b[i]; } if (normA === 0 || normB === 0) { return 0; } return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB)); } /** * 為 E5 模型添加適當的前綴 */ addE5Prefix(text, isQuery = true) { // E5 模型對查詢和文檔使用不同前綴 const prefix = isQuery ? 'query: ' : 'passage: '; // 避免重複添加前綴 if (text.startsWith(prefix)) { return text; } return prefix + text; } /** * 截斷過長的文本 */ truncateText(text) { // 簡單的字符長度截斷,實際應該使用 tokenizer const maxChars = this.config.maxSequenceLength * 2; // 粗略估算 if (text.length <= maxChars) { return text; } return text.substring(0, maxChars) + '...'; } /** * 將數組分批 */ createBatches(array, batchSize) { const batches = []; for (let i = 0; i < array.length; i += batchSize) { batches.push(array.slice(i, i + batchSize)); } return batches; } /** * 更新配置 */ updateConfig(newConfig) { this.config = { ...this.config, ...newConfig }; // 如果快取設定改變,重新初始化快取 if ('cacheEnabled' in newConfig || 'cacheSize' in newConfig || 'cacheTtlMs' in newConfig) { if (this.config.cacheEnabled) { this.cache = new cache_1.CacheService({ maxSize: this.config.cacheSize, ttlMs: this.config.cacheTtlMs }); } else { this.cache = null; } } } /** * 獲取當前配置 */ getConfig() { return { ...this.config }; } /** * 獲取模型信息 */ getModelInfo() { return { modelName: this.config.modelName, dimension: this.config.dimension, initialized: this.initialized, cacheStats: this.cache?.getStats() }; } /** * 清理資源 (實現 Disposable 介面) */ async dispose() { try { // 清理快取資源 if (this.cache) { // If cache has its own dispose method, call it if (typeof this.cache.dispose === 'function') { await this.cache.dispose(); } this.cache = null; } // 清理模型資源 if (this.model) { // If model has cleanup method, call it if (typeof this.model.dispose === 'function') { await this.model.dispose(); } else if (typeof this.model.cleanup === 'function') { await this.model.cleanup(); } this.model = null; } // 重置狀態 this.initialized = false; this.initializationPromise = null; logger_1.default.info('✅ EmbeddingService disposed successfully'); } catch (error) { logger_1.default.error('❌ Error disposing EmbeddingService', error); throw error; } } /** * 清理資源 (向後兼容) * @deprecated Use dispose() instead */ async cleanup() { await this.dispose(); } /** * 健康檢查 */ async healthCheck() { try { await this.initialize(); // 測試生成一個簡單的 embedding await this.generateEmbedding('test'); return { status: 'healthy', initialized: this.initialized, modelLoaded: this.model !== null }; } catch (error) { return { status: 'unhealthy', initialized: this.initialized, modelLoaded: this.model !== null, error: String(error) }; } } } exports.EmbeddingService = EmbeddingService; //# sourceMappingURL=embedding.js.map