@escher-dbai/rag-module
Version:
Enterprise RAG module with chat context storage, vector search, and session management. Complete chat history retrieval and streaming content extraction for Electron apps.
612 lines (542 loc) • 18.8 kB
JavaScript
/**
* ChatContextManager - Main class for real-time chat storage and filtering
*
* This module provides real-time prompt/response storage, filtering, and context management
* for UI integration. Handles the complete flow from user prompt to filtered storage.
*/
const ContextRetrievalService = require('./ContextRetrievalService');
const fs = require('fs').promises;
const path = require('path');
class ChatContextManager {
/**
* @param {Object} config - Configuration options
* @param {string} [config.baseDir] - Base directory for storage (default: './chat-data')
* @param {string} [config.rawDir] - Directory for raw chat files (default: 'raw')
* @param {string} [config.contextDir] - Directory for filtered context files (default: 'contexts')
* @param {Object} [config.filterConfig] - Filtering configuration
*/
constructor(config = {}) {
this.config = {
baseDir: config.baseDir || './chat-data',
rawDir: config.rawDir || 'raw',
contextDir: config.contextDir || 'contexts',
maxHistory: config.maxHistory || 25,
...config
};
// Initialize filtering configuration
this.filterConfig = {
systemKeyPrefixes: config.systemKeyPrefixes || ['cache:', 'system:', 'internal:'],
systemMessageRoles: config.systemMessageRoles || [2, 99],
systemContentPatterns: config.systemContentPatterns || [
'=== USER CONTEXT ===',
'=== USER AWS ESTATE ===',
'=== SYSTEM INFO ===',
'[INTERNAL]'
]
};
// In-memory session storage
this.activeSessions = new Map(); // sessionId -> sessionData
}
/**
* Start a new chat session
* @param {Object} sessionConfig - Session configuration
* @param {string} sessionConfig.userId - User ID
* @param {string} sessionConfig.sessionId - Unique session ID
* @param {string} [sessionConfig.chatTitle] - Chat title
* @param {number} [sessionConfig.agentType] - Agent type (default: 7)
* @returns {Object} - Session information
*/
async startSession({ userId, sessionId, chatTitle, agentType = 7 }) {
const contextId = `${userId}-${this.generateContextId()}`;
const sessionData = {
userId,
sessionId,
contextId,
chatTitle: chatTitle || `Chat Session ${new Date().toISOString()}`,
agentType,
createdAt: new Date().toISOString(),
responses: [], // Will store prompt/response pairs
isActive: true
};
this.activeSessions.set(sessionId, sessionData);
return {
sessionId,
contextId,
chatTitle: sessionData.chatTitle,
createdAt: sessionData.createdAt
};
}
/**
* Add a user prompt to the active session
* @param {string} sessionId - Session ID
* @param {string} prompt - User's prompt
* @returns {Object} - Prompt information
*/
async addPrompt(sessionId, prompt) {
const session = this.activeSessions.get(sessionId);
if (!session) {
throw new Error(`Session not found: ${sessionId}`);
}
const promptData = {
prompt: prompt.trim(),
timestamp: new Date().toISOString(),
stage: 'PROMPT_RECEIVED'
};
// Add to session (will be completed when response is added)
session.currentPrompt = promptData;
return {
sessionId,
contextId: session.contextId,
prompt: promptData.prompt,
timestamp: promptData.timestamp
};
}
/**
* Add API response to the session and complete the prompt/response pair
* @param {string} sessionId - Session ID
* @param {string} rawResponse - Raw response from API (JSON stream format)
* @param {Object} [metadata] - Additional metadata
* @returns {Object} - Response information
*/
async addResponse(sessionId, rawResponse, metadata = {}) {
const session = this.activeSessions.get(sessionId);
if (!session || !session.currentPrompt) {
throw new Error(`Session not found or no active prompt: ${sessionId}`);
}
// Complete the prompt/response pair
const responseData = {
agentType: session.agentType,
prompt: session.currentPrompt.prompt,
raw_response: rawResponse,
response_length: rawResponse.length,
stage: 'RAW_RESPONSE_RECEIVED',
timestamp: new Date().toISOString(),
...metadata
};
// Add to session responses
session.responses.push(responseData);
// Clear current prompt
delete session.currentPrompt;
// Auto-save and filter if session has multiple responses
if (session.responses.length > 0) {
await this.saveAndFilterSession(sessionId);
}
return {
sessionId,
contextId: session.contextId,
responseLength: responseData.response_length,
totalResponses: session.responses.length
};
}
/**
* Save session data and create filtered version
* @param {string} sessionId - Session ID
* @returns {Object} - Save results
*/
async saveAndFilterSession(sessionId) {
const session = this.activeSessions.get(sessionId);
if (!session) {
throw new Error(`Session not found: ${sessionId}`);
}
try {
// Create session data in API format
const sessionJson = {
chat_title: session.chatTitle,
context_id: session.contextId,
last_updated: new Date().toISOString(),
responses: session.responses,
total_responses: session.responses.length,
user_id: session.userId
};
// Save raw session data
const rawDir = path.join(this.config.baseDir, this.config.rawDir);
const rawFilePath = path.join(rawDir, `chat_${session.contextId}.json`);
await fs.mkdir(rawDir, { recursive: true });
await fs.writeFile(rawFilePath, JSON.stringify(sessionJson, null, 2), 'utf8');
// Create filtered version
const contextDir = path.join(this.config.baseDir, this.config.contextDir);
const contextFilePath = path.join(contextDir, `${session.contextId}.json`);
const metadata = {
agent_type: session.agentType,
chat_title: session.chatTitle,
context_id: session.contextId,
user_id: session.userId
};
const filteredResponse = await this.saveFilteredResponse({
records: [sessionJson],
metadata,
maxHistory: this.config.maxHistory,
outputPath: contextFilePath
});
return {
sessionId,
contextId: session.contextId,
rawFile: rawFilePath,
contextFile: contextFilePath,
totalResponses: session.responses.length,
filteredMessages: filteredResponse.conversation_state_after_filtering.total_messages
};
} catch (error) {
throw new Error(`Failed to save session ${sessionId}: ${error.message}`);
}
}
/**
* End a chat session and finalize storage
* @param {string} sessionId - Session ID
* @returns {Object} - Final session data
*/
async endSession(sessionId) {
const session = this.activeSessions.get(sessionId);
if (!session) {
throw new Error(`Session not found: ${sessionId}`);
}
// Final save and filter
const saveResults = await this.saveAndFilterSession(sessionId);
// Mark session as inactive
session.isActive = false;
session.endedAt = new Date().toISOString();
// Remove from active sessions
this.activeSessions.delete(sessionId);
return {
...saveResults,
endedAt: session.endedAt,
totalResponses: session.responses.length
};
}
/**
* Retrieve chat history for a context
* @param {string} contextId - Context ID
* @param {Object} [options] - Retrieval options
* @returns {Object} - Chat history with query-response pairs
*/
async getChatHistory(contextId, options = {}) {
try {
// Try to read from the filtered context file first
const contextFile = path.join(this.config.baseDir, this.config.contextDir, `${contextId}.json`);
try {
const contextData = JSON.parse(await fs.readFile(contextFile, 'utf8'));
return this.parseContextFileToHistory(contextData);
} catch (fileError) {
// If context file doesn't exist, check active session
const activeSession = Array.from(this.activeSessions.values())
.find(session => session.contextId === contextId);
if (activeSession) {
return this.parseSessionToHistory(activeSession);
}
// If no active session, return empty history
return {
contextId,
chatTitle: '',
queries: [],
responses: [],
pairs: [],
stats: {
totalMessages: 0,
queryCount: 0,
responseCount: 0,
pairCount: 0,
retrievedAt: new Date().toISOString()
}
};
}
} catch (error) {
throw new Error(`Failed to retrieve chat history: ${error.message}`);
}
}
/**
* Parse context file data to history format
* @param {Object} contextData - Context file data
* @returns {Object} - Chat history
*/
parseContextFileToHistory(contextData) {
const queries = [];
const responses = [];
const pairs = [];
if (contextData.final_conversation_context && contextData.final_conversation_context.m) {
// Parse messages from filtered context
for (const msg of contextData.final_conversation_context.m) {
const message = {
role: msg.r,
content: msg.c,
type: msg.r === 0 ? 'query' : 'response',
timestamp: contextData.timestamp,
messageId: `${contextData.context_id}_${msg.r}_${Date.now()}`
};
if (msg.r === 0) {
queries.push(message);
} else if (msg.r === 1) {
responses.push(message);
}
}
// Create pairs
for (let i = 0; i < Math.min(queries.length, responses.length); i++) {
pairs.push({
pairId: `${contextData.context_id}_pair_${i + 1}`,
query: queries[i],
response: responses[i],
pairIndex: i + 1
});
}
}
return {
contextId: contextData.context_id,
chatTitle: contextData.chat_title || '',
queries,
responses,
pairs,
stats: {
totalMessages: queries.length + responses.length,
queryCount: queries.length,
responseCount: responses.length,
pairCount: pairs.length,
retrievedAt: new Date().toISOString()
}
};
}
/**
* Parse active session to history format
* @param {Object} session - Active session data
* @returns {Object} - Chat history
*/
parseSessionToHistory(session) {
const queries = [];
const responses = [];
const pairs = [];
// Process responses from session
for (const response of session.responses) {
// Add prompt as query
if (response.prompt) {
queries.push({
role: 0,
content: response.prompt,
type: 'query',
timestamp: response.timestamp,
messageId: `${session.contextId}_0_${Date.now()}`
});
}
// Parse raw response for assistant content
if (response.raw_response) {
const events = this.parseConcatenatedJson(response.raw_response);
const contentParts = [];
for (const ev of events) {
if (ev && ev.type === 'content' && typeof ev.data === 'string' && ev.data.length > 0) {
contentParts.push(ev.data);
}
}
if (contentParts.length > 0) {
responses.push({
role: 1,
content: contentParts.join(''),
type: 'response',
timestamp: response.timestamp,
messageId: `${session.contextId}_1_${Date.now()}`
});
}
}
}
// Create pairs
for (let i = 0; i < Math.min(queries.length, responses.length); i++) {
pairs.push({
pairId: `${session.contextId}_pair_${i + 1}`,
query: queries[i],
response: responses[i],
pairIndex: i + 1
});
}
return {
contextId: session.contextId,
chatTitle: session.chatTitle,
queries,
responses,
pairs,
stats: {
totalMessages: queries.length + responses.length,
queryCount: queries.length,
responseCount: responses.length,
pairCount: pairs.length,
retrievedAt: new Date().toISOString()
}
};
}
/**
* Get active session info
* @param {string} sessionId - Session ID
* @returns {Object} - Session information
*/
getSessionInfo(sessionId) {
const session = this.activeSessions.get(sessionId);
if (!session) {
return null;
}
return {
sessionId,
contextId: session.contextId,
userId: session.userId,
chatTitle: session.chatTitle,
createdAt: session.createdAt,
totalResponses: session.responses.length,
isActive: session.isActive,
hasCurrentPrompt: !!session.currentPrompt
};
}
/**
* List all active sessions
* @returns {Array} - List of active sessions
*/
getActiveSessions() {
return Array.from(this.activeSessions.values()).map(session => ({
sessionId: session.sessionId,
contextId: session.contextId,
userId: session.userId,
chatTitle: session.chatTitle,
createdAt: session.createdAt,
totalResponses: session.responses.length,
isActive: session.isActive
}));
}
/**
* Generate a unique context ID
* @returns {string} - UUID-like context ID
*/
generateContextId() {
return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) {
const r = Math.random() * 16 | 0;
const v = c == 'x' ? r : (r & 0x3 | 0x8);
return v.toString(16);
});
}
/**
* Clean up old sessions and files
* @param {Object} [options] - Cleanup options
* @param {number} [options.olderThanDays] - Remove files older than X days (default: 30)
* @returns {Object} - Cleanup results
*/
async cleanup(options = {}) {
const { olderThanDays = 30 } = options;
const cutoffDate = new Date(Date.now() - (olderThanDays * 24 * 60 * 60 * 1000));
// Implementation for cleanup (could be added later)
return {
cleanupDate: new Date().toISOString(),
cutoffDate: cutoffDate.toISOString(),
filesRemoved: 0 // Placeholder
};
}
/**
* Parse concatenated JSON strings (inline replacement for UserContextStorage method)
* @param {string} jsonString - Concatenated JSON string or array
* @returns {Array} - Parsed JSON objects
*/
parseConcatenatedJson(jsonString) {
const events = [];
try {
// First, try to parse as a JSON array (streaming data format)
const parsedArray = JSON.parse(jsonString);
if (Array.isArray(parsedArray)) {
return parsedArray;
} else {
events.push(parsedArray);
return events;
}
} catch (error) {
// If that fails, try line-by-line parsing (fallback)
const lines = jsonString.split('\n');
for (const line of lines) {
const trimmed = line.trim();
if (!trimmed) continue;
try {
const parsed = JSON.parse(trimmed);
events.push(parsed);
} catch (error) {
// Skip invalid JSON lines
continue;
}
}
}
return events;
}
/**
* Save filtered response (inline replacement for UserContextStorage method)
* @param {Object} params - Parameters
* @param {Array} params.records - Records to process
* @param {Object} params.metadata - Metadata
* @param {number} params.maxHistory - Max history items
* @param {string} params.outputPath - Output file path
* @returns {Object} - Filtered response data
*/
async saveFilteredResponse({ records, metadata, maxHistory, outputPath }) {
try {
const record = records[0];
const filteredData = {
agent_type: metadata.agent_type,
chat_title: metadata.chat_title,
context_id: metadata.context_id,
conversation_state_after_filtering: {
assistant_messages: 0,
system_messages_filtered_out: 0,
total_messages: 0,
user_messages: 0
},
final_conversation_context: {
i: metadata.context_id,
a: metadata.agent_type,
ct: metadata.chat_title,
m: [],
t: Math.floor(Date.now() / 1000),
l: Math.floor(Date.now() / 1000)
},
note: "This is the cleaned and filtered response that will be stored in Redis, with content/canvas separation and system message filtering applied",
processing_results: {
canvas_data: "",
canvas_data_length: 0,
canvas_metadata: "",
canvas_metadata_length: 0,
clean_response: "",
clean_response_length: 0
},
stage: "FILTERED_RESPONSE_AFTER_CLEANING",
timestamp: new Date().toISOString(),
user_id: metadata.user_id
};
// Process responses to build conversation context
for (const response of record.responses) {
// Add user prompt
filteredData.final_conversation_context.m.push({
r: 0,
c: response.prompt
});
filteredData.conversation_state_after_filtering.user_messages++;
filteredData.conversation_state_after_filtering.total_messages++;
// Process raw response to extract clean content
let cleanResponse = "";
if (response.raw_response) {
const events = this.parseConcatenatedJson(response.raw_response);
const contentParts = [];
for (const ev of events) {
if (ev && ev.type === 'content' && typeof ev.data === 'string' && ev.data.length > 0) {
contentParts.push(ev.data);
}
}
cleanResponse = contentParts.join('');
}
// Add assistant response
filteredData.final_conversation_context.m.push({
r: 1,
c: cleanResponse
});
filteredData.conversation_state_after_filtering.assistant_messages++;
filteredData.conversation_state_after_filtering.total_messages++;
// Update processing results
filteredData.processing_results.clean_response = cleanResponse;
filteredData.processing_results.clean_response_length = cleanResponse.length;
}
// Ensure output directory exists
await fs.mkdir(path.dirname(outputPath), { recursive: true });
// Write filtered data to file
await fs.writeFile(outputPath, JSON.stringify(filteredData, null, 2));
return filteredData;
} catch (error) {
throw new Error(`Failed to save filtered response: ${error.message}`);
}
}
}
module.exports = ChatContextManager;