ai-knowledge
Version:
ai-knowledge
200 lines (195 loc) • 7.11 kB
JavaScript
const { RAGApplicationBuilder } = require('@llm-tools/embedjs');
const { LibSqlDb } = require('@llm-tools/embedjs-libsql');
// const { BGEM3Embeddings } = require('./embeddings');
const { OpenAiEmbeddings } = require('@llm-tools/embedjs-openai')
const { loadUrl, loadFile, loadDirectory } = require('./loaders');
const { generateHtmlReport } = require('./reporters');
const { ensureDir } = require('./utils');
const path = require('path');
const config = require('./config');
const { loadText } = require('./loaders/textLoader');
const fs = require('fs');
class KnowledgeBase {
// 静态缓存,用于存储已初始化的实例
static instances = new Map();
/**
* 检查知识库是否存在
* @param {string} name 知识库名称
*/
static exists(name) {
const dbPath = path.join(config.storage.path, name);
// 检查数据库文件是否存在
return fs.existsSync(dbPath);
}
/**
* 获取知识库实例(单例模式)
* @param {string} name 知识库名称
*/
static async getInstance(name) {
if (!this.instances.has(name)) {
const instance = new KnowledgeBase(name);
await instance.initialize();
this.instances.set(name, instance);
}
return this.instances.get(name);
}
/**
* 创建知识库实例
* @param {string} name 知识库名称
*/
constructor(name) {
this.name = name;
this.app = null;
this.storagePath = path.join(config.storage.path, this.name);
this.initialized = false;
this.config = config;
}
/**
* 初始化知识库
*/
async initialize() {
if (this.initialized) return;
try {
// 确保存储目录存在
ensureDir(this.config.storage.path);
this.embeddingModel = new OpenAiEmbeddings({
apiKey: this.config.api.apiKey,
configuration: { baseURL: this.config.api.baseURL },
dimensions: this.config.embeddings.dimensions,
batchSize: this.config.embeddings.batchSize,
model: this.config.embeddings.model || 'BAAI/bge-m3',
debug: true // 启用调试模式
});
await this.embeddingModel.init();
// 创建RAG应用
const { RAGApplicationBuilder } = require('@llm-tools/embedjs');
const { LibSqlDb } = require('@llm-tools/embedjs-libsql');
// 创建向量数据库
const vectorStore = new LibSqlDb({
path: path.join(this.config.storage.path, this.name),
});
this.app = await new RAGApplicationBuilder()
.setModel('NO_MODEL')
.setEmbeddingModel(this.embeddingModel)
.setVectorDatabase(vectorStore)
.build();
this.initialized = true;
} catch (error) {
console.error('Error initializing knowledge base:', error);
throw error;
}
}
/**
* 添加URL到知识库
* @param {string} url URL地址
* @param {boolean} forceReload 是否强制重新加载
*/
async addUrl(url, forceReload = false) {
await this.ensureInitialized();
return await loadUrl(this.app, url, forceReload);
}
/**
* 添加文件到知识库
* @param {string} filePath 文件路径
* @param {boolean} forceReload 是否强制重新加载
*/
async addFile(filePath, forceReload = false) {
await this.ensureInitialized();
return await loadFile(this.app, filePath, forceReload);
}
/**
* 添加目录到知识库
* @param {string} directoryPath 目录路径
* @param {object} options 选项
*/
async addDirectory(directoryPath, options = {}) {
await this.ensureInitialized();
return await loadDirectory(this.app, directoryPath, {
forceReload: options.forceReload || false,
});
}
/**
* 从知识库中移除内容
* @param {string} uniqueId 加载器ID
*/
async remove(uniqueId) {
await this.ensureInitialized();
try {
await this.app.deleteLoader(uniqueId);
return { success: true, uniqueId };
} catch (error) {
console.error(`Error removing content with ID ${uniqueId}:`, error);
return { success: false, uniqueId, error: error.message };
}
}
/**
* 搜索知识库
* @param {string} query 查询文本
* @param {boolean} generateReport 是否生成报告
*/
async search(query, generateReport = false) {
await this.ensureInitialized();
try {
const searchResults = await this.app.search(query);
const processedResults = searchResults.map(result => {
let source = result.metadata?.source || 'unknown';
// 只处理 LocalPathLoader 类型的源
if (result.metadata?.type === 'LocalPathLoader' && source) {
source = path.basename(source);
}
return {
text: result.pageContent,
score: result.score,
source,
// metadata: {
// type: result.metadata?.type || 'unknown',
// timestamp: result.metadata?.timestamp,
// uniqueLoaderId: result.metadata?.uniqueLoaderId
// }
};
});
let reportPath = null;
if (generateReport) {
reportPath = generateHtmlReport(processedResults, query, {
title: `${this.name} - 搜索结果`,
reportsDir: this.config.reports.path
});
}
return {
results: processedResults,
reportPath
};
} catch (error) {
console.error('Error searching:', error);
throw error;
}
}
/**
* 确保知识库已初始化
*/
async ensureInitialized() {
if (!this.initialized) {
await this.initialize();
}
}
/**
* 重置知识库
*/
async reset() {
if (this.app) {
await this.app.reset();
console.log(`Knowledge base "${this.name}" has been reset.`);
}
}
/**
* 添加文本到知识库
* @param {string} text 文本内容
* @param {boolean} forceReload 是否强制重新加载
* @param {object} metadata 可选的元数据
*/
async addText(text, forceReload = false, metadata = {}) {
await this.ensureInitialized();
return await loadText(this.app, text, forceReload, metadata);
}
}
module.exports = KnowledgeBase;