UNPKG

@restnfeel/agentc-starter-kit

Version:

한국어 기업용 CMS 모듈 - Task Master AI와 함께 빠르게 웹사이트를 구현할 수 있는 재사용 가능한 컴포넌트 시스템

492 lines (489 loc) 19.3 kB
import { DocumentProcessingPipeline } from './processors/pipeline.js'; import { VectorStoreFactory } from './vectorstore/index.js'; import { EmbeddingFactory } from './embeddings/index.js'; import { RSSFeedManager, NaverBlogRSSLoader, RSSLoader } from './loaders/rss.js'; import { SupabaseStorageManager } from './storage/supabase.js'; import { HybridRetriever } from './retrieval/hybrid-retriever.js'; class RAGEngine { constructor(config) { this.config = config; this.rssManager = new RSSFeedManager(); this.initializeComponents(); } initializeComponents() { // Initialize embedding model const embeddingConfig = { type: "openai", // Default to OpenAI for now apiKey: this.config.llmConfig.apiKey, modelName: this.config.embeddingModel, }; this.embeddingModel = EmbeddingFactory.create(embeddingConfig); // Initialize vector store const vectorStoreConfig = { storePath: this.config.vectorStorePath, }; this.vectorStore = VectorStoreFactory.create("memory", this.embeddingModel, vectorStoreConfig); // Initialize processing pipeline const pipelineConfig = { textSplitter: { chunkSize: this.config.chunkSize, chunkOverlap: this.config.chunkOverlap, }, enableMetadataExtraction: true, enableTextCleaning: true, }; this.processingPipeline = new DocumentProcessingPipeline(pipelineConfig); // Initialize Supabase Storage if configured if (this.config.supabaseConfig.url && this.config.supabaseConfig.anonKey) { this.storageManager = new SupabaseStorageManager({ url: this.config.supabaseConfig.url, anonKey: this.config.supabaseConfig.anonKey, bucket: this.config.supabaseConfig.bucket, }); } // Initialize Hybrid Retriever this.hybridRetriever = new HybridRetriever(this.vectorStore, this.embeddingModel, { vectorWeight: 0.7, keywordWeight: 0.3, maxResults: 10, minScore: 0.1, enableReranking: true, }); } async initialize() { // Initialize vector store if it has an initialize method if ("initialize" in this.vectorStore && typeof this.vectorStore.initialize === "function") { await this.vectorStore.initialize(); } // Initialize Supabase Storage bucket if configured if (this.storageManager) { await this.storageManager.initializeBucket(); } } async addDocument(filePath, content) { try { // Process the document const document = await this.processingPipeline.processDocument(filePath, content); // Add to vector store await this.vectorStore.addDocuments([document]); // Update hybrid retriever index if available if (this.hybridRetriever && document.chunks) { for (const chunk of document.chunks) { this.hybridRetriever.updateDocumentIndex(chunk.id, chunk.content); } } return document.id; } catch (error) { throw new Error(`Failed to add document: ${error}`); } } async addDocuments(files) { try { // Process all documents const documents = await this.processingPipeline.processBatch(files); // Add to vector store await this.vectorStore.addDocuments(documents); // Update hybrid retriever index if available if (this.hybridRetriever) { for (const document of documents) { if (document.chunks) { for (const chunk of document.chunks) { this.hybridRetriever.updateDocumentIndex(chunk.id, chunk.content); } } } } return documents.map((doc) => doc.id); } catch (error) { throw new Error(`Failed to add documents: ${error}`); } } async search(query, k = 5) { try { // Use hybrid search if available, fallback to vector search if (this.hybridRetriever) { return await this.hybridRetriever.search(query, k); } else { return await this.vectorStore.search(query, k); } } catch (error) { throw new Error(`Search failed: ${error}`); } } async vectorSearch(query, k = 5) { try { return await this.vectorStore.search(query, k); } catch (error) { throw new Error(`Vector search failed: ${error}`); } } async hybridSearch(query, k = 5) { if (!this.hybridRetriever) { throw new Error("Hybrid retriever is not initialized"); } try { return await this.hybridRetriever.search(query, k); } catch (error) { throw new Error(`Hybrid search failed: ${error}`); } } async deleteDocument(documentId) { try { await this.vectorStore.delete(documentId); } catch (error) { throw new Error(`Failed to delete document: ${error}`); } } async updateDocument(documentId, filePath, content) { try { // Process the updated document const document = await this.processingPipeline.processDocument(filePath, content); // Set the document ID to match the existing one document.id = documentId; // Update in vector store await this.vectorStore.update(document); } catch (error) { throw new Error(`Failed to update document: ${error}`); } } getSupportedExtensions() { return this.processingPipeline.getSupportedExtensions(); } async getStats() { const chunkCount = "getCount" in this.vectorStore && typeof this.vectorStore.getCount === "function" ? this.vectorStore.getCount() : 0; return { documentCount: 0, // TODO: Track document count separately chunkCount, embeddingModel: this.config.embeddingModel, vectorStoreType: "memory", // TODO: Make this dynamic }; } async clear() { if ("clear" in this.vectorStore && typeof this.vectorStore.clear === "function") { await this.vectorStore.clear(); } } // Supabase Storage specific methods async uploadDocumentToStorage(file, fileName, metadata) { if (!this.storageManager) { throw new Error("Supabase Storage is not configured"); } return this.storageManager.uploadDocument(file, fileName, metadata); } async addDocumentFromStorage(filePath) { if (!this.storageManager) { throw new Error("Supabase Storage is not configured"); } try { // Download document from storage const content = await this.storageManager.downloadDocument(filePath); // Process and add to RAG system return this.addDocument(filePath, content); } catch (error) { throw new Error(`Failed to add document from storage: ${error}`); } } async uploadAndAddDocument(file, fileName, metadata) { if (!this.storageManager) { throw new Error("Supabase Storage is not configured"); } try { // Upload to storage first const uploadResult = await this.storageManager.uploadDocument(file, fileName, metadata); // Process and add to RAG system const buffer = file instanceof File ? Buffer.from(await file.arrayBuffer()) : file; const documentId = await this.addDocument(uploadResult.path, buffer); return { documentId, uploadResult }; } catch (error) { throw new Error(`Failed to upload and add document: ${error}`); } } async deleteDocumentFromStorage(documentPath) { if (!this.storageManager) { throw new Error("Supabase Storage is not configured"); } try { // Delete from storage await this.storageManager.deleteDocument(documentPath); // Delete from vector store const documentId = await this.getDocumentIdFromPath(documentPath); if (documentId) { await this.deleteDocument(documentId); } } catch (error) { throw new Error(`Failed to delete document from storage: ${error}`); } } async listStorageDocuments() { if (!this.storageManager) { throw new Error("Supabase Storage is not configured"); } return this.storageManager.listDocuments(); } async getStorageStats() { if (!this.storageManager) { throw new Error("Supabase Storage is not configured"); } return this.storageManager.getStorageStats(); } async getDocumentIdFromPath(path) { // This is a simplified implementation // In a real scenario, you'd maintain a mapping between storage paths and document IDs return Buffer.from(path) .toString("base64") .replace(/[^a-zA-Z0-9]/g, ""); } isStorageConfigured() { return !!this.storageManager; } // Search configuration and statistics methods updateHybridSearchConfig(config) { if (!this.hybridRetriever) { throw new Error("Hybrid retriever is not initialized"); } this.hybridRetriever.updateConfig(config); } getSearchStats() { const stats = { vectorStore: { chunkCount: "getCount" in this.vectorStore && typeof this.vectorStore.getCount === "function" ? this.vectorStore.getCount() : 0, embeddingModel: this.config.embeddingModel, }, }; if (this.hybridRetriever) { stats.hybridRetriever = this.hybridRetriever.getSearchStats(); } return stats; } clearSearchIndex() { if (this.hybridRetriever) { this.hybridRetriever.clearIndex(); } } addSearchSynonyms(word, synonyms) { if (this.hybridRetriever) { // Access the query processor through the hybrid retriever // Note: This would require exposing the query processor in HybridRetriever console.log(`Adding synonyms for "${word}": ${synonyms.join(", ")}`); } } async searchWithOptions(query, options = {}) { const { method = "hybrid", k = 5 } = options; // Update search configuration if weights are provided if (this.hybridRetriever && (options.vectorWeight || options.keywordWeight)) { this.hybridRetriever.updateConfig({ vectorWeight: options.vectorWeight || 0.7, keywordWeight: options.keywordWeight || 0.3, minScore: options.minScore || 0.1, }); } switch (method) { case "vector": return this.vectorSearch(query, k); case "hybrid": return this.hybridSearch(query, k); default: return this.search(query, k); } } // RSS-related methods async addNaverBlogRSS(blogId, feedName, config) { try { const loader = new NaverBlogRSSLoader(blogId, config); const name = feedName || `naver_blog_${blogId}`; // Add to RSS manager this.rssManager.addFeed(name, loader); // Load and process documents const documents = await loader.loadBlog(); if (documents.length === 0) { throw new Error(`No documents found in Naver blog RSS: ${blogId}`); } // Process each document through the pipeline const processedDocs = []; for (const doc of documents) { // Convert Document to a format that pipeline can process const buffer = Buffer.from(doc.content, "utf-8"); const processedDoc = await this.processingPipeline.processDocument(doc.source || `rss_${doc.id}`, buffer); // Merge RSS metadata with processed metadata processedDoc.metadata = { ...processedDoc.metadata, ...doc.metadata }; processedDocs.push(processedDoc); } // Add to vector store await this.vectorStore.addDocuments(processedDocs); // Update hybrid retriever index if available if (this.hybridRetriever) { for (const document of processedDocs) { if (document.chunks) { for (const chunk of document.chunks) { this.hybridRetriever.updateDocumentIndex(chunk.id, chunk.content); } } } } return { documentIds: processedDocs.map((doc) => doc.id), feedName: name, itemCount: documents.length, }; } catch (error) { throw new Error(`Failed to add Naver blog RSS: ${error}`); } } async addRSSFeed(url, feedName, config) { try { const loader = new RSSLoader(config); const name = feedName || `rss_feed_${Date.now()}`; // Add to RSS manager this.rssManager.addFeed(name, loader); // Load and process documents const documents = await loader.loadFromURL(url); if (documents.length === 0) { throw new Error(`No documents found in RSS feed: ${url}`); } // Process each document through the pipeline const processedDocs = []; for (const doc of documents) { // Convert Document to a format that pipeline can process const buffer = Buffer.from(doc.content, "utf-8"); const processedDoc = await this.processingPipeline.processDocument(doc.source || `rss_${doc.id}`, buffer); // Merge RSS metadata with processed metadata processedDoc.metadata = { ...processedDoc.metadata, ...doc.metadata }; processedDocs.push(processedDoc); } // Add to vector store await this.vectorStore.addDocuments(processedDocs); // Update hybrid retriever index if available if (this.hybridRetriever) { for (const document of processedDocs) { if (document.chunks) { for (const chunk of document.chunks) { this.hybridRetriever.updateDocumentIndex(chunk.id, chunk.content); } } } } return { documentIds: processedDocs.map((doc) => doc.id), feedName: name, itemCount: documents.length, }; } catch (error) { throw new Error(`Failed to add RSS feed: ${error}`); } } async refreshRSSFeed(feedName) { try { const loader = this.rssManager.getFeed(feedName); if (!loader) { throw new Error(`RSS feed not found: ${feedName}`); } let documents = []; if (loader instanceof NaverBlogRSSLoader) { documents = await loader.loadBlog(); } else { // For generic RSS loader, we would need the URL // This is a limitation of the current design throw new Error("Cannot refresh generic RSS feed without URL. Please use addRSSFeed with the URL again."); } if (documents.length === 0) { return { documentIds: [], itemCount: 0 }; } // Process each document through the pipeline const processedDocs = []; for (const doc of documents) { const buffer = Buffer.from(doc.content, "utf-8"); const processedDoc = await this.processingPipeline.processDocument(doc.source || `rss_${doc.id}`, buffer); // Merge RSS metadata with processed metadata processedDoc.metadata = { ...processedDoc.metadata, ...doc.metadata }; processedDocs.push(processedDoc); } // Add to vector store (this will add new documents, might create duplicates) await this.vectorStore.addDocuments(processedDocs); // Update hybrid retriever index if available if (this.hybridRetriever) { for (const document of processedDocs) { if (document.chunks) { for (const chunk of document.chunks) { this.hybridRetriever.updateDocumentIndex(chunk.id, chunk.content); } } } } return { documentIds: processedDocs.map((doc) => doc.id), itemCount: documents.length, }; } catch (error) { throw new Error(`Failed to refresh RSS feed: ${error}`); } } async refreshAllRSSFeeds() { const results = new Map(); const feedNames = this.rssManager.getFeedNames(); for (const feedName of feedNames) { try { const result = await this.refreshRSSFeed(feedName); results.set(feedName, result); } catch (error) { console.error(`Failed to refresh RSS feed ${feedName}:`, error); results.set(feedName, { documentIds: [], itemCount: 0 }); } } return results; } removeRSSFeed(feedName) { return this.rssManager.removeFeed(feedName); } getRSSFeedNames() { return this.rssManager.getFeedNames(); } getRSSFeedInfo(feedName) { const loader = this.rssManager.getFeed(feedName); if (!loader) { return { exists: false, type: null }; } if (loader instanceof NaverBlogRSSLoader) { return { exists: true, type: "naver", blogId: loader.getBlogId(), rssUrl: loader.getRSSUrl(), }; } return { exists: true, type: "generic", }; } // Utility method to extract blog ID from Naver blog URL static extractNaverBlogId(url) { return NaverBlogRSSLoader.extractBlogId(url); } } export { RAGEngine }; //# sourceMappingURL=engine.js.map