UNPKG

alphe-redis-mcp-server

Version:

The most comprehensive Redis MCP Server for Alphe.AI - Optimized for sub-5 second response times with multi-layer caching

693 lines (601 loc) 20.6 kB
import { AlpheRedisClient } from '../core/redis-client.js'; // Cognitive Agent Configuration export interface CognitiveAgent { name: string; model: string; role: 'perception' | 'context' | 'planning' | 'reasoning' | 'reflection' | 'orchestrator'; endpoint: string; maxConcurrency: number; averageLatency: number; costPerToken: number; } export interface CognitiveTask { id: string; type: 'perception' | 'context_engineering' | 'planning' | 'reasoning' | 'reflection'; input: any; priority: number; deadline: number; dependencies: string[]; assignedAgent?: string; status: 'pending' | 'processing' | 'completed' | 'failed'; result?: any; error?: string; startTime?: number; endTime?: number; } export class CognitiveOrchestrator { private redisClient: AlpheRedisClient; private agents: Map<string, CognitiveAgent>; private activeTasks: Map<string, CognitiveTask>; private agentQueues: Map<string, CognitiveTask[]>; private isProcessing: boolean = false; // Free model configuration matching your setup private readonly FREE_AGENTS: CognitiveAgent[] = [ { name: 'perception_agent', model: 'gemma2-9b', role: 'perception', endpoint: 'http://localhost:11434/api/generate', maxConcurrency: 3, averageLatency: 200, // ms costPerToken: 0 // free }, { name: 'context_engineer', model: 'phi-3-mini', role: 'context', endpoint: 'http://localhost:11434/api/generate', maxConcurrency: 4, averageLatency: 150, costPerToken: 0 }, { name: 'planning_agent', model: 'qwq-32b', role: 'planning', endpoint: 'http://localhost:11434/api/generate', maxConcurrency: 2, averageLatency: 500, costPerToken: 0 }, { name: 'reasoning_agent', model: 'deepseek-r1', role: 'reasoning', endpoint: 'http://localhost:11434/api/generate', maxConcurrency: 2, averageLatency: 400, costPerToken: 0 }, { name: 'reflection_agent', model: 'llama-3.3-70b', role: 'reflection', endpoint: 'http://localhost:11434/api/generate', maxConcurrency: 1, averageLatency: 800, costPerToken: 0 }, { name: 'orchestrator_agent', model: 'mixtral-8x7b', role: 'orchestrator', endpoint: 'http://localhost:11434/api/generate', maxConcurrency: 2, averageLatency: 300, costPerToken: 0 } ]; constructor(redisClient: AlpheRedisClient) { this.redisClient = redisClient; this.agents = new Map(); this.activeTasks = new Map(); this.agentQueues = new Map(); // Initialize agents this.FREE_AGENTS.forEach(agent => { this.agents.set(agent.name, agent); this.agentQueues.set(agent.name, []); }); } // Process user query through cognitive pipeline async processQuery(query: string, context?: any): Promise<{ response: string; cognitiveInsights: any; latency: number; cachingStrategy: any; }> { const startTime = performance.now(); const sessionId = `session_${Date.now()}`; console.log('🧠 Starting cognitive processing pipeline...'); // Create cognitive tasks that run in parallel const tasks: CognitiveTask[] = [ { id: `perception_${sessionId}`, type: 'perception', input: { query, context }, priority: 1, deadline: Date.now() + 500, // 500ms deadline dependencies: [], status: 'pending' }, { id: `context_${sessionId}`, type: 'context_engineering', input: { query, context }, priority: 1, deadline: Date.now() + 300, // 300ms deadline dependencies: [], status: 'pending' }, { id: `planning_${sessionId}`, type: 'planning', input: { query, context }, priority: 2, deadline: Date.now() + 800, dependencies: [`perception_${sessionId}`], status: 'pending' }, { id: `reasoning_${sessionId}`, type: 'reasoning', input: { query, context }, priority: 2, deadline: Date.now() + 600, dependencies: [`context_${sessionId}`], status: 'pending' } ]; // Store tasks in Redis for distributed processing for (const task of tasks) { await this.redisClient.set(`cognitive_task:${task.id}`, task, { ttl: 60, namespace: 'cognitive' }); this.activeTasks.set(task.id, task); } // Process all tasks in parallel const results = await this.executeTasksInParallel(tasks); // Generate final response using orchestrator const finalResponse = await this.generateFinalResponse(query, results); const totalLatency = performance.now() - startTime; console.log(`✅ Cognitive processing completed in ${totalLatency.toFixed(2)}ms`); return { response: finalResponse.response, cognitiveInsights: { perceptionResult: results.perception, contextEngineering: results.context_engineering, planningResult: results.planning, reasoningResult: results.reasoning, processingLatency: totalLatency, agentsUsed: Object.keys(results), cachingUsed: finalResponse.fromCache }, latency: totalLatency, cachingStrategy: { queryHash: finalResponse.queryHash, cacheHit: finalResponse.fromCache, cacheLevel: finalResponse.cacheLevel, nextPredict: finalResponse.predictions } }; } // Execute cognitive tasks in parallel with intelligent scheduling private async executeTasksInParallel(tasks: CognitiveTask[]): Promise<any> { const results: any = {}; const startTime = Date.now(); // Separate tasks by dependencies const independentTasks = tasks.filter(t => t.dependencies.length === 0); const dependentTasks = tasks.filter(t => t.dependencies.length > 0); console.log(`🚀 Starting ${independentTasks.length} independent cognitive tasks...`); // Start all independent tasks immediately const independentPromises = independentTasks.map(task => this.executeTask(task).then(result => { results[task.type] = result; task.status = 'completed'; }) ); // Wait for independent tasks with timeout try { await Promise.all(independentPromises.map(p => Promise.race([ p, new Promise((_, reject) => setTimeout(() => reject(new Error('Task timeout')), 1000) ) ]) )); } catch (error) { console.warn('⚠️ Some independent tasks timed out, continuing with available results'); } console.log(`⚡ Independent tasks completed, starting ${dependentTasks.length} dependent tasks...`); // Now execute dependent tasks const dependentPromises = dependentTasks.map(async (task) => { // Check if dependencies are met const dependenciesMet = task.dependencies.every(depId => { const depTask = this.activeTasks.get(depId); return depTask?.status === 'completed'; }); if (dependenciesMet) { try { const result = await this.executeTask(task); results[task.type] = result; task.status = 'completed'; } catch (error) { console.error(`❌ Task ${task.id} failed:`, error); task.status = 'failed'; task.error = error instanceof Error ? error.message : 'Unknown error'; } } else { console.warn(`⏳ Task ${task.id} dependencies not met, using fallback`); results[task.type] = { fallback: true, reason: 'dependencies_not_met' }; } }); await Promise.allSettled(dependentPromises); const totalLatency = Date.now() - startTime; console.log(`🎯 All cognitive tasks completed in ${totalLatency}ms`); return results; } // Execute individual cognitive task private async executeTask(task: CognitiveTask): Promise<any> { const agent = this.getOptimalAgent(task.type); if (!agent) { throw new Error(`No agent available for task type: ${task.type}`); } task.assignedAgent = agent.name; task.status = 'processing'; task.startTime = Date.now(); console.log(`🤖 ${agent.name} (${agent.model}) processing ${task.type} task...`); try { // Check cache first const cacheKey = `cognitive:${task.type}:${JSON.stringify(task.input)}`; const cachedResult = await this.redisClient.get(cacheKey); if (cachedResult.success && cachedResult.data) { console.log(`⚡ Cache hit for ${task.type} task`); return { ...cachedResult.data, cached: true, latency: 5 // Near-zero latency for cached results }; } // Execute the cognitive task const result = await this.callCognitiveAgent(agent, task); // Cache the result for future queries await this.redisClient.set(cacheKey, result, { ttl: 300, // 5 minutes cache namespace: 'cognitive', tags: [task.type, agent.model] }); task.endTime = Date.now(); task.result = result; console.log(`✅ ${agent.name} completed ${task.type} in ${task.endTime - task.startTime!}ms`); return result; } catch (error) { task.status = 'failed'; task.error = error instanceof Error ? error.message : 'Unknown error'; task.endTime = Date.now(); throw error; } } // Call cognitive agent with optimized prompts private async callCognitiveAgent(agent: CognitiveAgent, task: CognitiveTask): Promise<any> { const prompt = this.generateOptimizedPrompt(agent.role, task.input); try { const response = await fetch(agent.endpoint, { method: 'POST', headers: { 'Content-Type': 'application/json', }, body: JSON.stringify({ model: agent.model, prompt: prompt, stream: false, options: { temperature: agent.role === 'reasoning' ? 0.1 : 0.3, top_p: 0.9, max_tokens: agent.role === 'reflection' ? 1000 : 500, num_predict: agent.role === 'context' ? 200 : 400 } }), signal: AbortSignal.timeout(2000) // 2 second timeout }); if (!response.ok) { throw new Error(`Agent ${agent.name} returned ${response.status}`); } const data = await response.json(); return { content: data.response || '', model: agent.model, tokens: data.eval_count || 0, latency: data.total_duration ? Math.round(data.total_duration / 1000000) : 0, // ns to ms cached: false }; } catch (error) { console.error(`❌ Agent ${agent.name} failed:`, error); // Return fallback response to keep system working return { content: this.getFallbackResponse(agent.role, task.input), model: agent.model, tokens: 0, latency: 0, cached: false, fallback: true, error: error instanceof Error ? error.message : 'Unknown error' }; } } // Generate optimized prompts for each agent type private generateOptimizedPrompt(role: string, input: any): string { const { query, context } = input; switch (role) { case 'perception': return `Analyze this user query and extract key information: Query: "${query}" Context: ${JSON.stringify(context || {})} Extract: 1. Intent (what user wants) 2. Entities (important objects/names) 3. Urgency (1-10) 4. Domain (tech, general, creative, etc.) 5. Expected response type Respond in JSON format only.`; case 'context': return `Optimize this query for better AI processing: Query: "${query}" Context: ${JSON.stringify(context || {})} Provide: 1. Rewritten query (clearer, more specific) 2. Missing context needed 3. Suggested follow-up questions 4. Key terms to emphasize 5. Processing strategy Respond in JSON format only.`; case 'planning': return `Create an execution plan for this query: Query: "${query}" Context: ${JSON.stringify(context || {})} Plan: 1. Step-by-step approach 2. Required resources/tools 3. Expected challenges 4. Success criteria 5. Fallback options Respond in JSON format only.`; case 'reasoning': return `Apply logical reasoning to this query: Query: "${query}" Context: ${JSON.stringify(context || {})} Reasoning: 1. Analysis of the problem 2. Logical deductions 3. Evidence/assumptions 4. Confidence level (1-10) 5. Alternative perspectives Respond in JSON format only.`; case 'reflection': return `Reflect on and improve this AI interaction: Query: "${query}" Context: ${JSON.stringify(context || {})} Reflection: 1. Response quality assessment 2. Potential improvements 3. User satisfaction prediction 4. Learning opportunities 5. Optimization suggestions Respond in JSON format only.`; default: return `Process this query: "${query}" with context: ${JSON.stringify(context || {})}`; } } // Get optimal agent for task type private getOptimalAgent(taskType: string): CognitiveAgent | null { const agents = Array.from(this.agents.values()).filter(agent => { switch (taskType) { case 'perception': return agent.role === 'perception'; case 'context_engineering': return agent.role === 'context'; case 'planning': return agent.role === 'planning'; case 'reasoning': return agent.role === 'reasoning'; case 'reflection': return agent.role === 'reflection'; default: return agent.role === 'orchestrator'; } }); if (agents.length === 0) return null; // Select agent with lowest current load return agents.sort((a, b) => { const queueA = this.agentQueues.get(a.name)?.length || 0; const queueB = this.agentQueues.get(b.name)?.length || 0; return queueA - queueB; })[0]; } // Generate final response using all cognitive insights private async generateFinalResponse(query: string, cognitiveResults: any): Promise<{ response: string; queryHash: string; fromCache: boolean; cacheLevel?: string; predictions?: string[]; }> { // Create hash for caching const queryHash = Buffer.from(query + JSON.stringify(cognitiveResults)).toString('base64').slice(0, 32); const cacheKey = `final_response:${queryHash}`; // Check cache first const cachedResponse = await this.redisClient.get(cacheKey); if (cachedResponse.success && cachedResponse.data) { return { response: cachedResponse.data.response, queryHash, fromCache: true, cacheLevel: cachedResponse.fromLevel, predictions: cachedResponse.data.predictions }; } // Generate response using orchestrator const orchestrator = this.agents.get('orchestrator_agent'); if (!orchestrator) { throw new Error('Orchestrator agent not available'); } const finalTask: CognitiveTask = { id: `orchestrator_${Date.now()}`, type: 'reasoning', input: { query, cognitiveResults, instruction: 'Synthesize all cognitive insights into a comprehensive response' }, priority: 1, deadline: Date.now() + 1000, dependencies: [], status: 'pending' }; const orchestratorResult = await this.callCognitiveAgent(orchestrator, finalTask); // Generate predictive caching suggestions const predictions = this.generatePredictions(query, cognitiveResults); const finalResponse = { response: orchestratorResult.content || 'Response generated using cognitive pipeline', queryHash, fromCache: false, predictions }; // Cache for future similar queries await this.redisClient.set(cacheKey, finalResponse, { ttl: 1800, // 30 minutes namespace: 'responses', tags: ['final', 'cognitive', 'orchestrated'] }); return finalResponse; } // Generate fallback responses to keep system responsive private getFallbackResponse(role: string, input: any): string { const { query } = input; switch (role) { case 'perception': return JSON.stringify({ intent: 'information_request', entities: [], urgency: 5, domain: 'general', expectedResponseType: 'explanation' }); case 'context': return JSON.stringify({ rewrittenQuery: query, missingContext: [], followUpQuestions: [], keyTerms: [], processingStrategy: 'direct' }); case 'planning': return JSON.stringify({ steps: ['Analyze query', 'Generate response'], requiredResources: [], challenges: [], successCriteria: ['Helpful response'], fallbacks: ['Use general knowledge'] }); default: return 'Processing...'; } } // Generate predictions for proactive caching private generatePredictions(query: string, results: any): string[] { const predictions = []; // Extract entities and topics for related queries if (results.perception?.entities) { results.perception.entities.forEach((entity: string) => { predictions.push(`What is ${entity}?`); predictions.push(`Tell me more about ${entity}`); }); } // Add domain-specific predictions if (results.perception?.domain === 'tech') { predictions.push('How does this work?'); predictions.push('What are the alternatives?'); predictions.push('Is this secure?'); } return predictions.slice(0, 5); // Limit to top 5 predictions } // Start cognitive background processing async startBackgroundProcessing(): Promise<void> { if (this.isProcessing) return; this.isProcessing = true; console.log('🧠 Starting cognitive background processing...'); // Keep agents warm by sending health checks setInterval(async () => { for (const agent of this.agents.values()) { try { await fetch(agent.endpoint, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ model: agent.model, prompt: 'ping', stream: false, options: { max_tokens: 1 } }), signal: AbortSignal.timeout(1000) }); } catch (error) { console.warn(`⚠️ Agent ${agent.name} health check failed`); } } }, 30000); // Every 30 seconds // Process queued tasks setInterval(async () => { await this.processQueuedTasks(); }, 1000); // Every second console.log('✅ Cognitive background processing started'); } private async processQueuedTasks(): Promise<void> { for (const [agentName, queue] of this.agentQueues.entries()) { if (queue.length === 0) continue; const agent = this.agents.get(agentName); if (!agent) continue; // Process tasks within concurrency limits const tasksToProcess = queue.splice(0, agent.maxConcurrency); const promises = tasksToProcess.map(async (task) => { try { await this.executeTask(task); } catch (error) { console.error(`❌ Queued task ${task.id} failed:`, error); } }); await Promise.allSettled(promises); } } // Get cognitive system status async getSystemStatus(): Promise<any> { const status = { agents: {}, activeTasks: this.activeTasks.size, totalProcessed: 0, avgLatency: 0, cacheHitRate: 0, systemHealth: 'healthy' }; for (const [name, agent] of this.agents.entries()) { const queueLength = this.agentQueues.get(name)?.length || 0; (status.agents as any)[name] = { model: agent.model, role: agent.role, status: queueLength > 0 ? 'busy' : 'idle', queueLength, maxConcurrency: agent.maxConcurrency, avgLatency: agent.averageLatency }; } return status; } async shutdown(): Promise<void> { console.log('🔌 Shutting down cognitive orchestrator...'); this.isProcessing = false; this.activeTasks.clear(); this.agentQueues.clear(); console.log('👋 Cognitive orchestrator shutdown complete'); } }