UNPKG

ai-knowledge

Version:

ai-knowledge

200 lines (195 loc) 7.11 kB
const { RAGApplicationBuilder } = require('@llm-tools/embedjs'); const { LibSqlDb } = require('@llm-tools/embedjs-libsql'); // const { BGEM3Embeddings } = require('./embeddings'); const { OpenAiEmbeddings } = require('@llm-tools/embedjs-openai') const { loadUrl, loadFile, loadDirectory } = require('./loaders'); const { generateHtmlReport } = require('./reporters'); const { ensureDir } = require('./utils'); const path = require('path'); const config = require('./config'); const { loadText } = require('./loaders/textLoader'); const fs = require('fs'); class KnowledgeBase { // 静态缓存,用于存储已初始化的实例 static instances = new Map(); /** * 检查知识库是否存在 * @param {string} name 知识库名称 */ static exists(name) { const dbPath = path.join(config.storage.path, name); // 检查数据库文件是否存在 return fs.existsSync(dbPath); } /** * 获取知识库实例(单例模式) * @param {string} name 知识库名称 */ static async getInstance(name) { if (!this.instances.has(name)) { const instance = new KnowledgeBase(name); await instance.initialize(); this.instances.set(name, instance); } return this.instances.get(name); } /** * 创建知识库实例 * @param {string} name 知识库名称 */ constructor(name) { this.name = name; this.app = null; this.storagePath = path.join(config.storage.path, this.name); this.initialized = false; this.config = config; } /** * 初始化知识库 */ async initialize() { if (this.initialized) return; try { // 确保存储目录存在 ensureDir(this.config.storage.path); this.embeddingModel = new OpenAiEmbeddings({ apiKey: this.config.api.apiKey, configuration: { baseURL: this.config.api.baseURL }, dimensions: this.config.embeddings.dimensions, batchSize: this.config.embeddings.batchSize, model: this.config.embeddings.model || 'BAAI/bge-m3', debug: true // 启用调试模式 }); await this.embeddingModel.init(); // 创建RAG应用 const { RAGApplicationBuilder } = require('@llm-tools/embedjs'); const { LibSqlDb } = require('@llm-tools/embedjs-libsql'); // 创建向量数据库 const vectorStore = new LibSqlDb({ path: path.join(this.config.storage.path, this.name), }); this.app = await new RAGApplicationBuilder() .setModel('NO_MODEL') .setEmbeddingModel(this.embeddingModel) .setVectorDatabase(vectorStore) .build(); this.initialized = true; } catch (error) { console.error('Error initializing knowledge base:', error); throw error; } } /** * 添加URL到知识库 * @param {string} url URL地址 * @param {boolean} forceReload 是否强制重新加载 */ async addUrl(url, forceReload = false) { await this.ensureInitialized(); return await loadUrl(this.app, url, forceReload); } /** * 添加文件到知识库 * @param {string} filePath 文件路径 * @param {boolean} forceReload 是否强制重新加载 */ async addFile(filePath, forceReload = false) { await this.ensureInitialized(); return await loadFile(this.app, filePath, forceReload); } /** * 添加目录到知识库 * @param {string} directoryPath 目录路径 * @param {object} options 选项 */ async addDirectory(directoryPath, options = {}) { await this.ensureInitialized(); return await loadDirectory(this.app, directoryPath, { forceReload: options.forceReload || false, }); } /** * 从知识库中移除内容 * @param {string} uniqueId 加载器ID */ async remove(uniqueId) { await this.ensureInitialized(); try { await this.app.deleteLoader(uniqueId); return { success: true, uniqueId }; } catch (error) { console.error(`Error removing content with ID ${uniqueId}:`, error); return { success: false, uniqueId, error: error.message }; } } /** * 搜索知识库 * @param {string} query 查询文本 * @param {boolean} generateReport 是否生成报告 */ async search(query, generateReport = false) { await this.ensureInitialized(); try { const searchResults = await this.app.search(query); const processedResults = searchResults.map(result => { let source = result.metadata?.source || 'unknown'; // 只处理 LocalPathLoader 类型的源 if (result.metadata?.type === 'LocalPathLoader' && source) { source = path.basename(source); } return { text: result.pageContent, score: result.score, source, // metadata: { // type: result.metadata?.type || 'unknown', // timestamp: result.metadata?.timestamp, // uniqueLoaderId: result.metadata?.uniqueLoaderId // } }; }); let reportPath = null; if (generateReport) { reportPath = generateHtmlReport(processedResults, query, { title: `${this.name} - 搜索结果`, reportsDir: this.config.reports.path }); } return { results: processedResults, reportPath }; } catch (error) { console.error('Error searching:', error); throw error; } } /** * 确保知识库已初始化 */ async ensureInitialized() { if (!this.initialized) { await this.initialize(); } } /** * 重置知识库 */ async reset() { if (this.app) { await this.app.reset(); console.log(`Knowledge base "${this.name}" has been reset.`); } } /** * 添加文本到知识库 * @param {string} text 文本内容 * @param {boolean} forceReload 是否强制重新加载 * @param {object} metadata 可选的元数据 */ async addText(text, forceReload = false, metadata = {}) { await this.ensureInitialized(); return await loadText(this.app, text, forceReload, metadata); } } module.exports = KnowledgeBase;