UNPKG

ai-knowledge

Version:

ai-knowledge

99 lines (93 loc) 3.34 kB
const path = require('path'); const fs = require('fs'); const { TextLoader, LocalPathLoader, JsonLoader } = require('@llm-tools/embedjs'); const { WebLoader } = require('@llm-tools/embedjs-loader-web'); const config = require('../config'); const loaderQueue = require('./queueManager'); const { v4: uuidv4 } = require('uuid'); // 支持的文件类型 const COMMON_EXTS = ['.pdf', '.csv', '.docx', '.pptx', '.xlsx', '.md']; const HTML_EXTS = ['.html', '.htm']; const JSON_EXTS = ['.json']; /** * 加载文件到知识库 */ async function loadFile(ragApplication, filePath, forceReload = true) { try { if (!fs.existsSync(filePath)) { throw new Error(`文件不存在: ${filePath}`); } const taskId = uuidv4(); const ext = path.extname(filePath).toLowerCase(); let loader; // 常见文档类型使用 LocalPathLoader if (COMMON_EXTS.includes(ext)) { loader = new LocalPathLoader({ path: filePath, chunkSize: config.chunking.chunkSize, chunkOverlap: config.chunking.chunkOverlap, pdfOptions: { verbose: false, disableLogging: true } }); } // HTML 文件处理 else if (HTML_EXTS.includes(ext)) { const html = fs.readFileSync(filePath, 'utf-8'); loader = new WebLoader({ urlOrContent: html, chunkSize: config.chunking.chunkSize, chunkOverlap: config.chunking.chunkOverlap }); } // JSON 文件处理 else if (JSON_EXTS.includes(ext)) { try { const jsonContent = fs.readFileSync(filePath, 'utf-8'); const jsonObject = JSON.parse(jsonContent); loader = new JsonLoader({ object: jsonObject }); } catch (error) { console.warn(`JSON解析失败 ${filePath}, 使用文本处理`); const text = fs.readFileSync(filePath, 'utf-8'); loader = new TextLoader({ text, chunkSize: config.chunking.chunkSize, chunkOverlap: config.chunking.chunkOverlap }); } } // 其他文件类型默认使用文本加载器 else { const text = fs.readFileSync(filePath, 'utf-8'); loader = new TextLoader({ text, chunkSize: config.chunking.chunkSize, chunkOverlap: config.chunking.chunkOverlap }); } // 添加任务到队列 loaderQueue.add({ taskId, ragApplication, loader, source: filePath, type: 'file', forceReload }); return { success: true, source: filePath, type: 'file', status: 'queued', taskId }; } catch (error) { console.error(`加载文件失败 ${filePath}:`, error); return { success: false, error: error.message }; } } module.exports = { loadFile };