ai-knowledge
Version:
ai-knowledge
99 lines (93 loc) • 3.34 kB
JavaScript
const path = require('path');
const fs = require('fs');
const { TextLoader, LocalPathLoader, JsonLoader } = require('@llm-tools/embedjs');
const { WebLoader } = require('@llm-tools/embedjs-loader-web');
const config = require('../config');
const loaderQueue = require('./queueManager');
const { v4: uuidv4 } = require('uuid');
// 支持的文件类型
const COMMON_EXTS = ['.pdf', '.csv', '.docx', '.pptx', '.xlsx', '.md'];
const HTML_EXTS = ['.html', '.htm'];
const JSON_EXTS = ['.json'];
/**
* 加载文件到知识库
*/
async function loadFile(ragApplication, filePath, forceReload = true) {
try {
if (!fs.existsSync(filePath)) {
throw new Error(`文件不存在: ${filePath}`);
}
const taskId = uuidv4();
const ext = path.extname(filePath).toLowerCase();
let loader;
// 常见文档类型使用 LocalPathLoader
if (COMMON_EXTS.includes(ext)) {
loader = new LocalPathLoader({
path: filePath,
chunkSize: config.chunking.chunkSize,
chunkOverlap: config.chunking.chunkOverlap,
pdfOptions: {
verbose: false,
disableLogging: true
}
});
}
// HTML 文件处理
else if (HTML_EXTS.includes(ext)) {
const html = fs.readFileSync(filePath, 'utf-8');
loader = new WebLoader({
urlOrContent: html,
chunkSize: config.chunking.chunkSize,
chunkOverlap: config.chunking.chunkOverlap
});
}
// JSON 文件处理
else if (JSON_EXTS.includes(ext)) {
try {
const jsonContent = fs.readFileSync(filePath, 'utf-8');
const jsonObject = JSON.parse(jsonContent);
loader = new JsonLoader({ object: jsonObject });
} catch (error) {
console.warn(`JSON解析失败 ${filePath}, 使用文本处理`);
const text = fs.readFileSync(filePath, 'utf-8');
loader = new TextLoader({
text,
chunkSize: config.chunking.chunkSize,
chunkOverlap: config.chunking.chunkOverlap
});
}
}
// 其他文件类型默认使用文本加载器
else {
const text = fs.readFileSync(filePath, 'utf-8');
loader = new TextLoader({
text,
chunkSize: config.chunking.chunkSize,
chunkOverlap: config.chunking.chunkOverlap
});
}
// 添加任务到队列
loaderQueue.add({
taskId,
ragApplication,
loader,
source: filePath,
type: 'file',
forceReload
});
return {
success: true,
source: filePath,
type: 'file',
status: 'queued',
taskId
};
} catch (error) {
console.error(`加载文件失败 ${filePath}:`, error);
return {
success: false,
error: error.message
};
}
}
module.exports = { loadFile };