@ever_cheng/memory-task-mcp
Version:
Memory and task management MCP Server
588 lines • 22.1 kB
JavaScript
"use strict";
/**
* Embedding Service for MemTask
*
* Provides multilingual-e5-large embedding generation with caching and batch processing.
* Optimized for semantic search and similarity calculations.
*/
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.EmbeddingService = exports.DEFAULT_EMBEDDING_CONFIG = void 0;
const logger_1 = __importDefault(require("./logger"));
const path = __importStar(require("path"));
const fs = __importStar(require("fs"));
// 動態導入 @xenova/transformers 以避免 ES module 問題
let pipeline;
/**
* 動態加載 transformers 模塊
* 使用 Function constructor 避免 TypeScript 編譯時問題
*/
async function loadTransformers() {
const dynamicImport = new Function('specifier', 'return import(specifier)');
const transformers = await dynamicImport('@xenova/transformers');
// 配置 ONNX runtime 環境變量以避免 worker 路徑問題
const { env } = transformers;
if (env) {
// 完全禁用 web workers 和多線程,使用同步執行
env.backends.onnx.wasm.numThreads = 1;
env.backends.onnx.wasm.simd = false;
env.backends.onnx.wasm.proxy = false;
env.backends.onnx.wasm.wasmPaths = undefined;
// 禁用所有 worker 相關功能
env.useBrowserCache = false;
env.useWebWorkers = false;
// 設置正確的 models 目錄路徑 - 優先使用環境變數,否則智慧搜尋
const modelsDir = process.env.EMBEDDING_MODELS_PATH || findModelsPath();
if (!modelsDir) {
logger_1.default.warn('No embedding models found, embedding functionality will be disabled');
return null; // 讓 embedding 功能失效但不中斷程式
}
logger_1.default.info(`Using embedding models path: ${modelsDir}`);
env.cacheDir = modelsDir;
env.localModelPath = modelsDir;
env.allowRemoteModels = false; // 強制使用本地模型避免網絡下載
env.allowLocalModels = true;
}
return transformers;
}
/**
* 智慧搜尋模型目錄路徑
*/
function findModelsPath() {
// 可能的模型路徑位置(按優先順序)
const possiblePaths = [
process.cwd() + '/models', // 當前執行目錄
process.cwd() + '/MemTask/models', // 當前執行目錄下的 MemTask
path.resolve(process.cwd(), '../models'), // 父目錄
path.resolve(process.cwd(), '../MemTask/models'), // 父目錄下的 MemTask
path.resolve(__dirname, '../../models'), // 編譯後的相對路徑(備用)
'/opt/models', // 系統級路徑
process.env.HOME + '/.local/share/models' // 用戶級路徑
];
for (const modelsPath of possiblePaths) {
try {
if (!fs.existsSync(modelsPath))
continue;
// 檢查是否有任何有效的模型
const hasValidModel = checkForValidModels(modelsPath);
if (hasValidModel) {
logger_1.default.info(`Found models directory at: ${modelsPath}`);
return modelsPath;
}
}
catch (error) {
// 忽略單個路徑的錯誤,繼續搜尋
continue;
}
}
logger_1.default.warn('No valid models directory found in any searched locations');
return null;
}
/**
* 檢查目錄下是否有有效的模型
*/
function checkForValidModels(basePath) {
try {
const entries = fs.readdirSync(basePath, { withFileTypes: true });
for (const entry of entries) {
if (entry.isDirectory()) {
const modelPath = path.join(basePath, entry.name);
// 檢查是否有 config.json 和 onnx/model.onnx
const configPath = path.join(modelPath, 'config.json');
const onnxPath = path.join(modelPath, 'onnx', 'model.onnx');
if (fs.existsSync(configPath) && fs.existsSync(onnxPath)) {
return true; // 找到至少一個有效模型
}
else {
// 遞迴檢查子目錄(處理 intfloat/multilingual-e5-base 這種結構)
if (checkForValidModels(modelPath)) {
return true;
}
}
}
}
return false;
}
catch (error) {
return false;
}
}
/**
* 檢查本地可用的模型
*/
function getAvailableLocalModels() {
// 優先使用環境變數,否則智慧搜尋
const modelsDir = process.env.EMBEDDING_MODELS_PATH || findModelsPath();
if (!modelsDir) {
logger_1.default.warn('No embedding models directory found, no local models available');
return [];
}
logger_1.default.debug(`Scanning for models in: ${modelsDir}`);
const availableModels = [];
try {
// 檢查 models 目錄下的模型
const checkModelDir = (basePath, prefix = '') => {
if (!fs.existsSync(basePath))
return;
const entries = fs.readdirSync(basePath, { withFileTypes: true });
for (const entry of entries) {
if (entry.isDirectory()) {
const modelPath = path.join(basePath, entry.name);
const fullModelName = prefix ? `${prefix}/${entry.name}` : entry.name;
// 檢查是否有 config.json 和 onnx/model.onnx
const configPath = path.join(modelPath, 'config.json');
const onnxPath = path.join(modelPath, 'onnx', 'model.onnx');
if (fs.existsSync(configPath) && fs.existsSync(onnxPath)) {
availableModels.push(fullModelName);
}
else {
// 繼續往下檢查子目錄
checkModelDir(modelPath, fullModelName);
}
}
}
};
// 檢查 models 目錄下的模型
checkModelDir(modelsDir);
}
catch (error) {
logger_1.default.warn('檢查本地模型時出錯:', error);
}
return availableModels;
}
const cache_1 = require("./cache");
/**
* Default embedding configuration for multilingual-e5-base
* 使用 models 中可用的模型
*/
exports.DEFAULT_EMBEDDING_CONFIG = {
// 檢查環境變數,如果沒有則使用 models 中可用的模型
modelName: process.env.EMBEDDING_MODEL || 'intfloat/multilingual-e5-base',
dimension: 768, // multilingual-e5-base 的維度是 768
maxSequenceLength: 512,
batchSize: 32,
device: 'cpu',
quantized: false, // 改為 false 避免量化問題
cacheEnabled: true,
cacheSize: parseInt(process.env.EMBEDDING_CACHE_SIZE || '1000'),
cacheTtlMs: 24 * 60 * 60 * 1000 // 24 hours
};
/**
* Embedding Service Class
*/
class EmbeddingService {
constructor(config = exports.DEFAULT_EMBEDDING_CONFIG) {
this.model = null;
this.cache = null;
this.initialized = false;
this.initializationPromise = null;
this.config = config;
if (this.config.cacheEnabled) {
this.cache = new cache_1.CacheService({
maxSize: this.config.cacheSize,
ttlMs: this.config.cacheTtlMs
});
}
}
/**
* 初始化 embedding 模型
*/
async initialize() {
if (this.initialized)
return;
if (this.initializationPromise) {
return this.initializationPromise;
}
this.initializationPromise = this._initializeModel();
await this.initializationPromise;
}
async _initializeModel() {
// 檢查本地可用模型
const availableModels = getAvailableLocalModels();
logger_1.default.debug(`本地可用模型:`, { availableModels });
// 如果配置的模型不可用,使用第一個可用的模型
let modelToUse = this.config.modelName;
try {
if (!availableModels.includes(this.config.modelName) && availableModels.length > 0) {
modelToUse = availableModels[0];
logger_1.default.warn(`配置的模型 ${this.config.modelName} 不可用,使用 ${modelToUse}`, { configuredModel: this.config.modelName, usedModel: modelToUse });
}
logger_1.default.info(`正在載入 embedding 模型: ${modelToUse}`, { model: modelToUse });
// 動態導入 @xenova/transformers
const transformers = await loadTransformers();
if (!transformers) {
throw new Error('Transformers not available, embedding models path not found');
}
pipeline = transformers.pipeline;
// 嘗試使用主要配置
try {
// 優先使用環境變數,否則智慧搜尋
const modelsDir = process.env.EMBEDDING_MODELS_PATH || findModelsPath();
if (!modelsDir) {
throw new Error('No embedding models directory found');
}
logger_1.default.info(`Loading embedding model from: ${modelsDir}`);
this.model = await pipeline('feature-extraction', modelToUse, {
quantized: this.config.quantized,
revision: 'main',
// Node.js 兼容配置 - 使用絕對路徑
cache_dir: modelsDir,
local_files_only: true, // 強制使用本地文件
use_external_data_format: false,
// 強制使用 CPU 執行提供者
providers: ['cpu']
});
}
catch (primaryError) {
logger_1.default.warn(`主要配置失敗,嘗試備用配置: ${primaryError}`, { model: modelToUse });
// 使用更保守的配置作為備用方案
// 優先使用環境變數,否則智慧搜尋
const modelsDir = process.env.EMBEDDING_MODELS_PATH || findModelsPath();
if (!modelsDir) {
throw new Error('No embedding models directory found');
}
this.model = await pipeline('feature-extraction', modelToUse, {
quantized: false,
revision: 'main',
// 使用絕對路徑
cache_dir: modelsDir,
local_files_only: true, // 強制使用本地文件
// 強制使用 Node.js 兼容模式
execution_providers: ['CPUExecutionProvider'],
inter_op_num_threads: 1,
intra_op_num_threads: 1,
// 禁用所有 web 功能
use_external_data_format: false,
providers: ['cpu']
});
}
this.initialized = true;
// 更新配置中的模型名稱為實際使用的模型
this.config.modelName = modelToUse;
logger_1.default.info(`✅ Embedding 模型載入成功: ${modelToUse}`, { model: modelToUse });
}
catch (error) {
logger_1.default.error(`❌ Embedding 模型載入失敗`, error, { model: modelToUse });
logger_1.default.warn('Embedding functionality will be disabled due to model initialization failure');
this.model = null;
this.initialized = true; // 標記為已初始化但功能失效
}
}
/**
* 生成單個文本的 embedding
*/
async generateEmbedding(text) {
const startTime = Date.now();
// 檢查緩存
if (this.cache) {
const cached = this.cache.get(text);
if (cached) {
return {
embedding: cached,
text,
model: this.config.modelName,
dimension: this.config.dimension,
processingTime: Date.now() - startTime
};
}
}
await this.initialize();
if (!this.model) {
logger_1.default.warn('Embedding model not available, returning empty embedding');
return {
embedding: [],
text,
model: this.config.modelName,
dimension: 0,
processingTime: 0
};
}
try {
// E5 模型需要特殊的查詢前綴
const prefixedText = this.addE5Prefix(text);
// 截斷過長的文本
const truncatedText = this.truncateText(prefixedText);
// 生成 embedding
const result = await this.model(truncatedText, {
pooling: 'mean',
normalize: true
});
// 提取 embedding 數組
const embedding = Array.from(result.data.flat ? result.data.flat() : result.data);
// 緩存結果
if (this.cache) {
this.cache.set(text, embedding);
}
const processingTime = Date.now() - startTime;
return {
embedding,
text,
model: this.config.modelName,
dimension: embedding.length,
processingTime
};
}
catch (error) {
logger_1.default.error('生成 embedding 失敗', error, { text });
throw new Error(`Failed to generate embedding: ${error}`);
}
}
/**
* 批量生成 embeddings
*/
async batchGenerateEmbeddings(texts) {
const startTime = Date.now();
if (texts.length === 0) {
return {
embeddings: [],
texts: [],
model: this.config.modelName,
dimension: this.config.dimension,
totalProcessingTime: 0,
averageProcessingTime: 0
};
}
if (!this.model) {
logger_1.default.warn('Embedding model not available, returning empty embeddings for batch');
return {
embeddings: texts.map(() => []),
texts: texts,
model: this.config.modelName,
dimension: 0,
totalProcessingTime: 0,
averageProcessingTime: 0
};
}
// 檢查緩存
const results = [];
const uncachedTexts = [];
const uncachedIndices = [];
for (let i = 0; i < texts.length; i++) {
const text = texts[i];
const cached = this.cache?.get(text);
if (cached) {
results[i] = cached;
}
else {
uncachedTexts.push(text);
uncachedIndices.push(i);
}
}
// 處理未緩存的文本
if (uncachedTexts.length > 0) {
const batches = this.createBatches(uncachedTexts, this.config.batchSize);
for (const batch of batches) {
const batchResults = await Promise.all(batch.map(text => this.generateEmbedding(text)));
// 將結果放回正確的位置
for (let i = 0; i < batch.length; i++) {
const originalIndex = uncachedIndices.shift();
results[originalIndex] = batchResults[i].embedding;
}
}
}
const totalProcessingTime = Date.now() - startTime;
return {
embeddings: results,
texts,
model: this.config.modelName,
dimension: this.config.dimension,
totalProcessingTime,
averageProcessingTime: totalProcessingTime / texts.length
};
}
/**
* 獲取本地可用的模型列表
*/
static getAvailableModels() {
return getAvailableLocalModels();
}
/**
* 計算兩個 embedding 之間的餘弦相似度
*/
static cosineSimilarity(a, b) {
if (a.length !== b.length) {
throw new Error('Embedding dimensions must match');
}
let dotProduct = 0;
let normA = 0;
let normB = 0;
for (let i = 0; i < a.length; i++) {
dotProduct += a[i] * b[i];
normA += a[i] * a[i];
normB += b[i] * b[i];
}
if (normA === 0 || normB === 0) {
return 0;
}
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
}
/**
* 為 E5 模型添加適當的前綴
*/
addE5Prefix(text, isQuery = true) {
// E5 模型對查詢和文檔使用不同前綴
const prefix = isQuery ? 'query: ' : 'passage: ';
// 避免重複添加前綴
if (text.startsWith(prefix)) {
return text;
}
return prefix + text;
}
/**
* 截斷過長的文本
*/
truncateText(text) {
// 簡單的字符長度截斷,實際應該使用 tokenizer
const maxChars = this.config.maxSequenceLength * 2; // 粗略估算
if (text.length <= maxChars) {
return text;
}
return text.substring(0, maxChars) + '...';
}
/**
* 將數組分批
*/
createBatches(array, batchSize) {
const batches = [];
for (let i = 0; i < array.length; i += batchSize) {
batches.push(array.slice(i, i + batchSize));
}
return batches;
}
/**
* 更新配置
*/
updateConfig(newConfig) {
this.config = { ...this.config, ...newConfig };
// 如果快取設定改變,重新初始化快取
if ('cacheEnabled' in newConfig || 'cacheSize' in newConfig || 'cacheTtlMs' in newConfig) {
if (this.config.cacheEnabled) {
this.cache = new cache_1.CacheService({
maxSize: this.config.cacheSize,
ttlMs: this.config.cacheTtlMs
});
}
else {
this.cache = null;
}
}
}
/**
* 獲取當前配置
*/
getConfig() {
return { ...this.config };
}
/**
* 獲取模型信息
*/
getModelInfo() {
return {
modelName: this.config.modelName,
dimension: this.config.dimension,
initialized: this.initialized,
cacheStats: this.cache?.getStats()
};
}
/**
* 清理資源 (實現 Disposable 介面)
*/
async dispose() {
try {
// 清理快取資源
if (this.cache) {
// If cache has its own dispose method, call it
if (typeof this.cache.dispose === 'function') {
await this.cache.dispose();
}
this.cache = null;
}
// 清理模型資源
if (this.model) {
// If model has cleanup method, call it
if (typeof this.model.dispose === 'function') {
await this.model.dispose();
}
else if (typeof this.model.cleanup === 'function') {
await this.model.cleanup();
}
this.model = null;
}
// 重置狀態
this.initialized = false;
this.initializationPromise = null;
logger_1.default.info('✅ EmbeddingService disposed successfully');
}
catch (error) {
logger_1.default.error('❌ Error disposing EmbeddingService', error);
throw error;
}
}
/**
* 清理資源 (向後兼容)
* @deprecated Use dispose() instead
*/
async cleanup() {
await this.dispose();
}
/**
* 健康檢查
*/
async healthCheck() {
try {
await this.initialize();
// 測試生成一個簡單的 embedding
await this.generateEmbedding('test');
return {
status: 'healthy',
initialized: this.initialized,
modelLoaded: this.model !== null
};
}
catch (error) {
return {
status: 'unhealthy',
initialized: this.initialized,
modelLoaded: this.model !== null,
error: String(error)
};
}
}
}
exports.EmbeddingService = EmbeddingService;
//# sourceMappingURL=embedding.js.map