UNPKG

@escher-dbai/rag-module

Version:

Enterprise RAG module with chat context storage, vector search, and session management. Complete chat history retrieval and streaming content extraction for Electron apps.

612 lines (542 loc) 18.8 kB
/** * ChatContextManager - Main class for real-time chat storage and filtering * * This module provides real-time prompt/response storage, filtering, and context management * for UI integration. Handles the complete flow from user prompt to filtered storage. */ const ContextRetrievalService = require('./ContextRetrievalService'); const fs = require('fs').promises; const path = require('path'); class ChatContextManager { /** * @param {Object} config - Configuration options * @param {string} [config.baseDir] - Base directory for storage (default: './chat-data') * @param {string} [config.rawDir] - Directory for raw chat files (default: 'raw') * @param {string} [config.contextDir] - Directory for filtered context files (default: 'contexts') * @param {Object} [config.filterConfig] - Filtering configuration */ constructor(config = {}) { this.config = { baseDir: config.baseDir || './chat-data', rawDir: config.rawDir || 'raw', contextDir: config.contextDir || 'contexts', maxHistory: config.maxHistory || 25, ...config }; // Initialize filtering configuration this.filterConfig = { systemKeyPrefixes: config.systemKeyPrefixes || ['cache:', 'system:', 'internal:'], systemMessageRoles: config.systemMessageRoles || [2, 99], systemContentPatterns: config.systemContentPatterns || [ '=== USER CONTEXT ===', '=== USER AWS ESTATE ===', '=== SYSTEM INFO ===', '[INTERNAL]' ] }; // In-memory session storage this.activeSessions = new Map(); // sessionId -> sessionData } /** * Start a new chat session * @param {Object} sessionConfig - Session configuration * @param {string} sessionConfig.userId - User ID * @param {string} sessionConfig.sessionId - Unique session ID * @param {string} [sessionConfig.chatTitle] - Chat title * @param {number} [sessionConfig.agentType] - Agent type (default: 7) * @returns {Object} - Session information */ async startSession({ userId, sessionId, chatTitle, agentType = 7 }) { const contextId = `${userId}-${this.generateContextId()}`; const sessionData = { userId, sessionId, contextId, chatTitle: chatTitle || `Chat Session ${new Date().toISOString()}`, agentType, createdAt: new Date().toISOString(), responses: [], // Will store prompt/response pairs isActive: true }; this.activeSessions.set(sessionId, sessionData); return { sessionId, contextId, chatTitle: sessionData.chatTitle, createdAt: sessionData.createdAt }; } /** * Add a user prompt to the active session * @param {string} sessionId - Session ID * @param {string} prompt - User's prompt * @returns {Object} - Prompt information */ async addPrompt(sessionId, prompt) { const session = this.activeSessions.get(sessionId); if (!session) { throw new Error(`Session not found: ${sessionId}`); } const promptData = { prompt: prompt.trim(), timestamp: new Date().toISOString(), stage: 'PROMPT_RECEIVED' }; // Add to session (will be completed when response is added) session.currentPrompt = promptData; return { sessionId, contextId: session.contextId, prompt: promptData.prompt, timestamp: promptData.timestamp }; } /** * Add API response to the session and complete the prompt/response pair * @param {string} sessionId - Session ID * @param {string} rawResponse - Raw response from API (JSON stream format) * @param {Object} [metadata] - Additional metadata * @returns {Object} - Response information */ async addResponse(sessionId, rawResponse, metadata = {}) { const session = this.activeSessions.get(sessionId); if (!session || !session.currentPrompt) { throw new Error(`Session not found or no active prompt: ${sessionId}`); } // Complete the prompt/response pair const responseData = { agentType: session.agentType, prompt: session.currentPrompt.prompt, raw_response: rawResponse, response_length: rawResponse.length, stage: 'RAW_RESPONSE_RECEIVED', timestamp: new Date().toISOString(), ...metadata }; // Add to session responses session.responses.push(responseData); // Clear current prompt delete session.currentPrompt; // Auto-save and filter if session has multiple responses if (session.responses.length > 0) { await this.saveAndFilterSession(sessionId); } return { sessionId, contextId: session.contextId, responseLength: responseData.response_length, totalResponses: session.responses.length }; } /** * Save session data and create filtered version * @param {string} sessionId - Session ID * @returns {Object} - Save results */ async saveAndFilterSession(sessionId) { const session = this.activeSessions.get(sessionId); if (!session) { throw new Error(`Session not found: ${sessionId}`); } try { // Create session data in API format const sessionJson = { chat_title: session.chatTitle, context_id: session.contextId, last_updated: new Date().toISOString(), responses: session.responses, total_responses: session.responses.length, user_id: session.userId }; // Save raw session data const rawDir = path.join(this.config.baseDir, this.config.rawDir); const rawFilePath = path.join(rawDir, `chat_${session.contextId}.json`); await fs.mkdir(rawDir, { recursive: true }); await fs.writeFile(rawFilePath, JSON.stringify(sessionJson, null, 2), 'utf8'); // Create filtered version const contextDir = path.join(this.config.baseDir, this.config.contextDir); const contextFilePath = path.join(contextDir, `${session.contextId}.json`); const metadata = { agent_type: session.agentType, chat_title: session.chatTitle, context_id: session.contextId, user_id: session.userId }; const filteredResponse = await this.saveFilteredResponse({ records: [sessionJson], metadata, maxHistory: this.config.maxHistory, outputPath: contextFilePath }); return { sessionId, contextId: session.contextId, rawFile: rawFilePath, contextFile: contextFilePath, totalResponses: session.responses.length, filteredMessages: filteredResponse.conversation_state_after_filtering.total_messages }; } catch (error) { throw new Error(`Failed to save session ${sessionId}: ${error.message}`); } } /** * End a chat session and finalize storage * @param {string} sessionId - Session ID * @returns {Object} - Final session data */ async endSession(sessionId) { const session = this.activeSessions.get(sessionId); if (!session) { throw new Error(`Session not found: ${sessionId}`); } // Final save and filter const saveResults = await this.saveAndFilterSession(sessionId); // Mark session as inactive session.isActive = false; session.endedAt = new Date().toISOString(); // Remove from active sessions this.activeSessions.delete(sessionId); return { ...saveResults, endedAt: session.endedAt, totalResponses: session.responses.length }; } /** * Retrieve chat history for a context * @param {string} contextId - Context ID * @param {Object} [options] - Retrieval options * @returns {Object} - Chat history with query-response pairs */ async getChatHistory(contextId, options = {}) { try { // Try to read from the filtered context file first const contextFile = path.join(this.config.baseDir, this.config.contextDir, `${contextId}.json`); try { const contextData = JSON.parse(await fs.readFile(contextFile, 'utf8')); return this.parseContextFileToHistory(contextData); } catch (fileError) { // If context file doesn't exist, check active session const activeSession = Array.from(this.activeSessions.values()) .find(session => session.contextId === contextId); if (activeSession) { return this.parseSessionToHistory(activeSession); } // If no active session, return empty history return { contextId, chatTitle: '', queries: [], responses: [], pairs: [], stats: { totalMessages: 0, queryCount: 0, responseCount: 0, pairCount: 0, retrievedAt: new Date().toISOString() } }; } } catch (error) { throw new Error(`Failed to retrieve chat history: ${error.message}`); } } /** * Parse context file data to history format * @param {Object} contextData - Context file data * @returns {Object} - Chat history */ parseContextFileToHistory(contextData) { const queries = []; const responses = []; const pairs = []; if (contextData.final_conversation_context && contextData.final_conversation_context.m) { // Parse messages from filtered context for (const msg of contextData.final_conversation_context.m) { const message = { role: msg.r, content: msg.c, type: msg.r === 0 ? 'query' : 'response', timestamp: contextData.timestamp, messageId: `${contextData.context_id}_${msg.r}_${Date.now()}` }; if (msg.r === 0) { queries.push(message); } else if (msg.r === 1) { responses.push(message); } } // Create pairs for (let i = 0; i < Math.min(queries.length, responses.length); i++) { pairs.push({ pairId: `${contextData.context_id}_pair_${i + 1}`, query: queries[i], response: responses[i], pairIndex: i + 1 }); } } return { contextId: contextData.context_id, chatTitle: contextData.chat_title || '', queries, responses, pairs, stats: { totalMessages: queries.length + responses.length, queryCount: queries.length, responseCount: responses.length, pairCount: pairs.length, retrievedAt: new Date().toISOString() } }; } /** * Parse active session to history format * @param {Object} session - Active session data * @returns {Object} - Chat history */ parseSessionToHistory(session) { const queries = []; const responses = []; const pairs = []; // Process responses from session for (const response of session.responses) { // Add prompt as query if (response.prompt) { queries.push({ role: 0, content: response.prompt, type: 'query', timestamp: response.timestamp, messageId: `${session.contextId}_0_${Date.now()}` }); } // Parse raw response for assistant content if (response.raw_response) { const events = this.parseConcatenatedJson(response.raw_response); const contentParts = []; for (const ev of events) { if (ev && ev.type === 'content' && typeof ev.data === 'string' && ev.data.length > 0) { contentParts.push(ev.data); } } if (contentParts.length > 0) { responses.push({ role: 1, content: contentParts.join(''), type: 'response', timestamp: response.timestamp, messageId: `${session.contextId}_1_${Date.now()}` }); } } } // Create pairs for (let i = 0; i < Math.min(queries.length, responses.length); i++) { pairs.push({ pairId: `${session.contextId}_pair_${i + 1}`, query: queries[i], response: responses[i], pairIndex: i + 1 }); } return { contextId: session.contextId, chatTitle: session.chatTitle, queries, responses, pairs, stats: { totalMessages: queries.length + responses.length, queryCount: queries.length, responseCount: responses.length, pairCount: pairs.length, retrievedAt: new Date().toISOString() } }; } /** * Get active session info * @param {string} sessionId - Session ID * @returns {Object} - Session information */ getSessionInfo(sessionId) { const session = this.activeSessions.get(sessionId); if (!session) { return null; } return { sessionId, contextId: session.contextId, userId: session.userId, chatTitle: session.chatTitle, createdAt: session.createdAt, totalResponses: session.responses.length, isActive: session.isActive, hasCurrentPrompt: !!session.currentPrompt }; } /** * List all active sessions * @returns {Array} - List of active sessions */ getActiveSessions() { return Array.from(this.activeSessions.values()).map(session => ({ sessionId: session.sessionId, contextId: session.contextId, userId: session.userId, chatTitle: session.chatTitle, createdAt: session.createdAt, totalResponses: session.responses.length, isActive: session.isActive })); } /** * Generate a unique context ID * @returns {string} - UUID-like context ID */ generateContextId() { return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) { const r = Math.random() * 16 | 0; const v = c == 'x' ? r : (r & 0x3 | 0x8); return v.toString(16); }); } /** * Clean up old sessions and files * @param {Object} [options] - Cleanup options * @param {number} [options.olderThanDays] - Remove files older than X days (default: 30) * @returns {Object} - Cleanup results */ async cleanup(options = {}) { const { olderThanDays = 30 } = options; const cutoffDate = new Date(Date.now() - (olderThanDays * 24 * 60 * 60 * 1000)); // Implementation for cleanup (could be added later) return { cleanupDate: new Date().toISOString(), cutoffDate: cutoffDate.toISOString(), filesRemoved: 0 // Placeholder }; } /** * Parse concatenated JSON strings (inline replacement for UserContextStorage method) * @param {string} jsonString - Concatenated JSON string or array * @returns {Array} - Parsed JSON objects */ parseConcatenatedJson(jsonString) { const events = []; try { // First, try to parse as a JSON array (streaming data format) const parsedArray = JSON.parse(jsonString); if (Array.isArray(parsedArray)) { return parsedArray; } else { events.push(parsedArray); return events; } } catch (error) { // If that fails, try line-by-line parsing (fallback) const lines = jsonString.split('\n'); for (const line of lines) { const trimmed = line.trim(); if (!trimmed) continue; try { const parsed = JSON.parse(trimmed); events.push(parsed); } catch (error) { // Skip invalid JSON lines continue; } } } return events; } /** * Save filtered response (inline replacement for UserContextStorage method) * @param {Object} params - Parameters * @param {Array} params.records - Records to process * @param {Object} params.metadata - Metadata * @param {number} params.maxHistory - Max history items * @param {string} params.outputPath - Output file path * @returns {Object} - Filtered response data */ async saveFilteredResponse({ records, metadata, maxHistory, outputPath }) { try { const record = records[0]; const filteredData = { agent_type: metadata.agent_type, chat_title: metadata.chat_title, context_id: metadata.context_id, conversation_state_after_filtering: { assistant_messages: 0, system_messages_filtered_out: 0, total_messages: 0, user_messages: 0 }, final_conversation_context: { i: metadata.context_id, a: metadata.agent_type, ct: metadata.chat_title, m: [], t: Math.floor(Date.now() / 1000), l: Math.floor(Date.now() / 1000) }, note: "This is the cleaned and filtered response that will be stored in Redis, with content/canvas separation and system message filtering applied", processing_results: { canvas_data: "", canvas_data_length: 0, canvas_metadata: "", canvas_metadata_length: 0, clean_response: "", clean_response_length: 0 }, stage: "FILTERED_RESPONSE_AFTER_CLEANING", timestamp: new Date().toISOString(), user_id: metadata.user_id }; // Process responses to build conversation context for (const response of record.responses) { // Add user prompt filteredData.final_conversation_context.m.push({ r: 0, c: response.prompt }); filteredData.conversation_state_after_filtering.user_messages++; filteredData.conversation_state_after_filtering.total_messages++; // Process raw response to extract clean content let cleanResponse = ""; if (response.raw_response) { const events = this.parseConcatenatedJson(response.raw_response); const contentParts = []; for (const ev of events) { if (ev && ev.type === 'content' && typeof ev.data === 'string' && ev.data.length > 0) { contentParts.push(ev.data); } } cleanResponse = contentParts.join(''); } // Add assistant response filteredData.final_conversation_context.m.push({ r: 1, c: cleanResponse }); filteredData.conversation_state_after_filtering.assistant_messages++; filteredData.conversation_state_after_filtering.total_messages++; // Update processing results filteredData.processing_results.clean_response = cleanResponse; filteredData.processing_results.clean_response_length = cleanResponse.length; } // Ensure output directory exists await fs.mkdir(path.dirname(outputPath), { recursive: true }); // Write filtered data to file await fs.writeFile(outputPath, JSON.stringify(filteredData, null, 2)); return filteredData; } catch (error) { throw new Error(`Failed to save filtered response: ${error.message}`); } } } module.exports = ChatContextManager;