alphe-redis-mcp-server
Version: 
The most comprehensive Redis MCP Server for Alphe.AI - Optimized for sub-5 second response times with multi-layer caching
693 lines (601 loc) • 20.6 kB
text/typescript
import { AlpheRedisClient } from '../core/redis-client.js';
// Cognitive Agent Configuration
export interface CognitiveAgent {
  name: string;
  model: string;
  role: 'perception' | 'context' | 'planning' | 'reasoning' | 'reflection' | 'orchestrator';
  endpoint: string;
  maxConcurrency: number;
  averageLatency: number;
  costPerToken: number;
}
export interface CognitiveTask {
  id: string;
  type: 'perception' | 'context_engineering' | 'planning' | 'reasoning' | 'reflection';
  input: any;
  priority: number;
  deadline: number;
  dependencies: string[];
  assignedAgent?: string;
  status: 'pending' | 'processing' | 'completed' | 'failed';
  result?: any;
  error?: string;
  startTime?: number;
  endTime?: number;
}
export class CognitiveOrchestrator {
  private redisClient: AlpheRedisClient;
  private agents: Map<string, CognitiveAgent>;
  private activeTasks: Map<string, CognitiveTask>;
  private agentQueues: Map<string, CognitiveTask[]>;
  private isProcessing: boolean = false;
  // Free model configuration matching your setup
  private readonly FREE_AGENTS: CognitiveAgent[] = [
    {
      name: 'perception_agent',
      model: 'gemma2-9b',
      role: 'perception',
      endpoint: 'http://localhost:11434/api/generate',
      maxConcurrency: 3,
      averageLatency: 200, // ms
      costPerToken: 0 // free
    },
    {
      name: 'context_engineer',
      model: 'phi-3-mini',
      role: 'context',
      endpoint: 'http://localhost:11434/api/generate',
      maxConcurrency: 4,
      averageLatency: 150,
      costPerToken: 0
    },
    {
      name: 'planning_agent',
      model: 'qwq-32b',
      role: 'planning',
      endpoint: 'http://localhost:11434/api/generate',
      maxConcurrency: 2,
      averageLatency: 500,
      costPerToken: 0
    },
    {
      name: 'reasoning_agent',
      model: 'deepseek-r1',
      role: 'reasoning',
      endpoint: 'http://localhost:11434/api/generate',
      maxConcurrency: 2,
      averageLatency: 400,
      costPerToken: 0
    },
    {
      name: 'reflection_agent',
      model: 'llama-3.3-70b',
      role: 'reflection',
      endpoint: 'http://localhost:11434/api/generate',
      maxConcurrency: 1,
      averageLatency: 800,
      costPerToken: 0
    },
    {
      name: 'orchestrator_agent',
      model: 'mixtral-8x7b',
      role: 'orchestrator',
      endpoint: 'http://localhost:11434/api/generate',
      maxConcurrency: 2,
      averageLatency: 300,
      costPerToken: 0
    }
  ];
  constructor(redisClient: AlpheRedisClient) {
    this.redisClient = redisClient;
    this.agents = new Map();
    this.activeTasks = new Map();
    this.agentQueues = new Map();
    // Initialize agents
    this.FREE_AGENTS.forEach(agent => {
      this.agents.set(agent.name, agent);
      this.agentQueues.set(agent.name, []);
    });
  }
  // Process user query through cognitive pipeline
  async processQuery(query: string, context?: any): Promise<{
    response: string;
    cognitiveInsights: any;
    latency: number;
    cachingStrategy: any;
  }> {
    const startTime = performance.now();
    const sessionId = `session_${Date.now()}`;
    console.log('🧠 Starting cognitive processing pipeline...');
    // Create cognitive tasks that run in parallel
    const tasks: CognitiveTask[] = [
      {
        id: `perception_${sessionId}`,
        type: 'perception',
        input: { query, context },
        priority: 1,
        deadline: Date.now() + 500, // 500ms deadline
        dependencies: [],
        status: 'pending'
      },
      {
        id: `context_${sessionId}`,
        type: 'context_engineering',
        input: { query, context },
        priority: 1,
        deadline: Date.now() + 300, // 300ms deadline
        dependencies: [],
        status: 'pending'
      },
      {
        id: `planning_${sessionId}`,
        type: 'planning',
        input: { query, context },
        priority: 2,
        deadline: Date.now() + 800,
        dependencies: [`perception_${sessionId}`],
        status: 'pending'
      },
      {
        id: `reasoning_${sessionId}`,
        type: 'reasoning',
        input: { query, context },
        priority: 2,
        deadline: Date.now() + 600,
        dependencies: [`context_${sessionId}`],
        status: 'pending'
      }
    ];
    // Store tasks in Redis for distributed processing
    for (const task of tasks) {
      await this.redisClient.set(`cognitive_task:${task.id}`, task, { 
        ttl: 60, 
        namespace: 'cognitive' 
      });
      this.activeTasks.set(task.id, task);
    }
    // Process all tasks in parallel
    const results = await this.executeTasksInParallel(tasks);
    // Generate final response using orchestrator
    const finalResponse = await this.generateFinalResponse(query, results);
    const totalLatency = performance.now() - startTime;
    console.log(`✅ Cognitive processing completed in ${totalLatency.toFixed(2)}ms`);
    return {
      response: finalResponse.response,
      cognitiveInsights: {
        perceptionResult: results.perception,
        contextEngineering: results.context_engineering,
        planningResult: results.planning,
        reasoningResult: results.reasoning,
        processingLatency: totalLatency,
        agentsUsed: Object.keys(results),
        cachingUsed: finalResponse.fromCache
      },
      latency: totalLatency,
      cachingStrategy: {
        queryHash: finalResponse.queryHash,
        cacheHit: finalResponse.fromCache,
        cacheLevel: finalResponse.cacheLevel,
        nextPredict: finalResponse.predictions
      }
    };
  }
  // Execute cognitive tasks in parallel with intelligent scheduling
  private async executeTasksInParallel(tasks: CognitiveTask[]): Promise<any> {
    const results: any = {};
    const startTime = Date.now();
    // Separate tasks by dependencies
    const independentTasks = tasks.filter(t => t.dependencies.length === 0);
    const dependentTasks = tasks.filter(t => t.dependencies.length > 0);
    console.log(`🚀 Starting ${independentTasks.length} independent cognitive tasks...`);
    // Start all independent tasks immediately
    const independentPromises = independentTasks.map(task => 
      this.executeTask(task).then(result => {
        results[task.type] = result;
        task.status = 'completed';
      })
    );
    // Wait for independent tasks with timeout
    try {
      await Promise.all(independentPromises.map(p => 
        Promise.race([
          p,
          new Promise((_, reject) => 
            setTimeout(() => reject(new Error('Task timeout')), 1000)
          )
        ])
      ));
    } catch (error) {
      console.warn('⚠️ Some independent tasks timed out, continuing with available results');
    }
    console.log(`⚡ Independent tasks completed, starting ${dependentTasks.length} dependent tasks...`);
    // Now execute dependent tasks
    const dependentPromises = dependentTasks.map(async (task) => {
      // Check if dependencies are met
      const dependenciesMet = task.dependencies.every(depId => {
        const depTask = this.activeTasks.get(depId);
        return depTask?.status === 'completed';
      });
      if (dependenciesMet) {
        try {
          const result = await this.executeTask(task);
          results[task.type] = result;
          task.status = 'completed';
        } catch (error) {
          console.error(`❌ Task ${task.id} failed:`, error);
          task.status = 'failed';
          task.error = error instanceof Error ? error.message : 'Unknown error';
        }
      } else {
        console.warn(`⏳ Task ${task.id} dependencies not met, using fallback`);
        results[task.type] = { fallback: true, reason: 'dependencies_not_met' };
      }
    });
    await Promise.allSettled(dependentPromises);
    const totalLatency = Date.now() - startTime;
    console.log(`🎯 All cognitive tasks completed in ${totalLatency}ms`);
    return results;
  }
  // Execute individual cognitive task
  private async executeTask(task: CognitiveTask): Promise<any> {
    const agent = this.getOptimalAgent(task.type);
    if (!agent) {
      throw new Error(`No agent available for task type: ${task.type}`);
    }
    task.assignedAgent = agent.name;
    task.status = 'processing';
    task.startTime = Date.now();
    console.log(`🤖 ${agent.name} (${agent.model}) processing ${task.type} task...`);
    try {
      // Check cache first
      const cacheKey = `cognitive:${task.type}:${JSON.stringify(task.input)}`;
      const cachedResult = await this.redisClient.get(cacheKey);
      
      if (cachedResult.success && cachedResult.data) {
        console.log(`⚡ Cache hit for ${task.type} task`);
        return {
          ...cachedResult.data,
          cached: true,
          latency: 5 // Near-zero latency for cached results
        };
      }
      // Execute the cognitive task
      const result = await this.callCognitiveAgent(agent, task);
      // Cache the result for future queries
      await this.redisClient.set(cacheKey, result, { 
        ttl: 300, // 5 minutes cache
        namespace: 'cognitive',
        tags: [task.type, agent.model]
      });
      task.endTime = Date.now();
      task.result = result;
      console.log(`✅ ${agent.name} completed ${task.type} in ${task.endTime - task.startTime!}ms`);
      return result;
    } catch (error) {
      task.status = 'failed';
      task.error = error instanceof Error ? error.message : 'Unknown error';
      task.endTime = Date.now();
      throw error;
    }
  }
  // Call cognitive agent with optimized prompts
  private async callCognitiveAgent(agent: CognitiveAgent, task: CognitiveTask): Promise<any> {
    const prompt = this.generateOptimizedPrompt(agent.role, task.input);
    
    try {
      const response = await fetch(agent.endpoint, {
        method: 'POST',
        headers: {
          'Content-Type': 'application/json',
        },
        body: JSON.stringify({
          model: agent.model,
          prompt: prompt,
          stream: false,
          options: {
            temperature: agent.role === 'reasoning' ? 0.1 : 0.3,
            top_p: 0.9,
            max_tokens: agent.role === 'reflection' ? 1000 : 500,
            num_predict: agent.role === 'context' ? 200 : 400
          }
        }),
        signal: AbortSignal.timeout(2000) // 2 second timeout
      });
      if (!response.ok) {
        throw new Error(`Agent ${agent.name} returned ${response.status}`);
      }
      const data = await response.json();
      
      return {
        content: data.response || '',
        model: agent.model,
        tokens: data.eval_count || 0,
        latency: data.total_duration ? Math.round(data.total_duration / 1000000) : 0, // ns to ms
        cached: false
      };
    } catch (error) {
      console.error(`❌ Agent ${agent.name} failed:`, error);
      
      // Return fallback response to keep system working
      return {
        content: this.getFallbackResponse(agent.role, task.input),
        model: agent.model,
        tokens: 0,
        latency: 0,
        cached: false,
        fallback: true,
        error: error instanceof Error ? error.message : 'Unknown error'
      };
    }
  }
  // Generate optimized prompts for each agent type
  private generateOptimizedPrompt(role: string, input: any): string {
    const { query, context } = input;
    
    switch (role) {
      case 'perception':
        return `Analyze this user query and extract key information:
Query: "${query}"
Context: ${JSON.stringify(context || {})}
Extract:
1. Intent (what user wants)
2. Entities (important objects/names)
3. Urgency (1-10)
4. Domain (tech, general, creative, etc.)
5. Expected response type
Respond in JSON format only.`;
      case 'context':
        return `Optimize this query for better AI processing:
Query: "${query}"
Context: ${JSON.stringify(context || {})}
Provide:
1. Rewritten query (clearer, more specific)
2. Missing context needed
3. Suggested follow-up questions
4. Key terms to emphasize
5. Processing strategy
Respond in JSON format only.`;
      case 'planning':
        return `Create an execution plan for this query:
Query: "${query}"
Context: ${JSON.stringify(context || {})}
Plan:
1. Step-by-step approach
2. Required resources/tools
3. Expected challenges
4. Success criteria
5. Fallback options
Respond in JSON format only.`;
      case 'reasoning':
        return `Apply logical reasoning to this query:
Query: "${query}"
Context: ${JSON.stringify(context || {})}
Reasoning:
1. Analysis of the problem
2. Logical deductions
3. Evidence/assumptions
4. Confidence level (1-10)
5. Alternative perspectives
Respond in JSON format only.`;
      case 'reflection':
        return `Reflect on and improve this AI interaction:
Query: "${query}"
Context: ${JSON.stringify(context || {})}
Reflection:
1. Response quality assessment
2. Potential improvements
3. User satisfaction prediction
4. Learning opportunities
5. Optimization suggestions
Respond in JSON format only.`;
      default:
        return `Process this query: "${query}" with context: ${JSON.stringify(context || {})}`;
    }
  }
  // Get optimal agent for task type
  private getOptimalAgent(taskType: string): CognitiveAgent | null {
    const agents = Array.from(this.agents.values()).filter(agent => {
      switch (taskType) {
        case 'perception':
          return agent.role === 'perception';
        case 'context_engineering':
          return agent.role === 'context';
        case 'planning':
          return agent.role === 'planning';
        case 'reasoning':
          return agent.role === 'reasoning';
        case 'reflection':
          return agent.role === 'reflection';
        default:
          return agent.role === 'orchestrator';
      }
    });
    if (agents.length === 0) return null;
    // Select agent with lowest current load
    return agents.sort((a, b) => {
      const queueA = this.agentQueues.get(a.name)?.length || 0;
      const queueB = this.agentQueues.get(b.name)?.length || 0;
      return queueA - queueB;
    })[0];
  }
  // Generate final response using all cognitive insights
  private async generateFinalResponse(query: string, cognitiveResults: any): Promise<{
    response: string;
    queryHash: string;
    fromCache: boolean;
    cacheLevel?: string;
    predictions?: string[];
  }> {
    // Create hash for caching
    const queryHash = Buffer.from(query + JSON.stringify(cognitiveResults)).toString('base64').slice(0, 32);
    const cacheKey = `final_response:${queryHash}`;
    // Check cache first
    const cachedResponse = await this.redisClient.get(cacheKey);
    if (cachedResponse.success && cachedResponse.data) {
      return {
        response: cachedResponse.data.response,
        queryHash,
        fromCache: true,
        cacheLevel: cachedResponse.fromLevel,
        predictions: cachedResponse.data.predictions
      };
    }
    // Generate response using orchestrator
    const orchestrator = this.agents.get('orchestrator_agent');
    if (!orchestrator) {
      throw new Error('Orchestrator agent not available');
    }
    const finalTask: CognitiveTask = {
      id: `orchestrator_${Date.now()}`,
      type: 'reasoning',
      input: {
        query,
        cognitiveResults,
        instruction: 'Synthesize all cognitive insights into a comprehensive response'
      },
      priority: 1,
      deadline: Date.now() + 1000,
      dependencies: [],
      status: 'pending'
    };
    const orchestratorResult = await this.callCognitiveAgent(orchestrator, finalTask);
    
    // Generate predictive caching suggestions
    const predictions = this.generatePredictions(query, cognitiveResults);
    const finalResponse = {
      response: orchestratorResult.content || 'Response generated using cognitive pipeline',
      queryHash,
      fromCache: false,
      predictions
    };
    // Cache for future similar queries
    await this.redisClient.set(cacheKey, finalResponse, { 
      ttl: 1800, // 30 minutes
      namespace: 'responses',
      tags: ['final', 'cognitive', 'orchestrated']
    });
    return finalResponse;
  }
  // Generate fallback responses to keep system responsive
  private getFallbackResponse(role: string, input: any): string {
    const { query } = input;
    
    switch (role) {
      case 'perception':
        return JSON.stringify({
          intent: 'information_request',
          entities: [],
          urgency: 5,
          domain: 'general',
          expectedResponseType: 'explanation'
        });
      case 'context':
        return JSON.stringify({
          rewrittenQuery: query,
          missingContext: [],
          followUpQuestions: [],
          keyTerms: [],
          processingStrategy: 'direct'
        });
      case 'planning':
        return JSON.stringify({
          steps: ['Analyze query', 'Generate response'],
          requiredResources: [],
          challenges: [],
          successCriteria: ['Helpful response'],
          fallbacks: ['Use general knowledge']
        });
      default:
        return 'Processing...';
    }
  }
  // Generate predictions for proactive caching
  private generatePredictions(query: string, results: any): string[] {
    const predictions = [];
    
    // Extract entities and topics for related queries
    if (results.perception?.entities) {
      results.perception.entities.forEach((entity: string) => {
        predictions.push(`What is ${entity}?`);
        predictions.push(`Tell me more about ${entity}`);
      });
    }
    // Add domain-specific predictions
    if (results.perception?.domain === 'tech') {
      predictions.push('How does this work?');
      predictions.push('What are the alternatives?');
      predictions.push('Is this secure?');
    }
    return predictions.slice(0, 5); // Limit to top 5 predictions
  }
  // Start cognitive background processing
  async startBackgroundProcessing(): Promise<void> {
    if (this.isProcessing) return;
    
    this.isProcessing = true;
    console.log('🧠 Starting cognitive background processing...');
    
    // Keep agents warm by sending health checks
    setInterval(async () => {
      for (const agent of this.agents.values()) {
        try {
          await fetch(agent.endpoint, {
            method: 'POST',
            headers: { 'Content-Type': 'application/json' },
            body: JSON.stringify({
              model: agent.model,
              prompt: 'ping',
              stream: false,
              options: { max_tokens: 1 }
            }),
            signal: AbortSignal.timeout(1000)
          });
        } catch (error) {
          console.warn(`⚠️ Agent ${agent.name} health check failed`);
        }
      }
    }, 30000); // Every 30 seconds
    // Process queued tasks
    setInterval(async () => {
      await this.processQueuedTasks();
    }, 1000); // Every second
    console.log('✅ Cognitive background processing started');
  }
  private async processQueuedTasks(): Promise<void> {
    for (const [agentName, queue] of this.agentQueues.entries()) {
      if (queue.length === 0) continue;
      
      const agent = this.agents.get(agentName);
      if (!agent) continue;
      // Process tasks within concurrency limits
      const tasksToProcess = queue.splice(0, agent.maxConcurrency);
      
      const promises = tasksToProcess.map(async (task) => {
        try {
          await this.executeTask(task);
        } catch (error) {
          console.error(`❌ Queued task ${task.id} failed:`, error);
        }
      });
      await Promise.allSettled(promises);
    }
  }
  // Get cognitive system status
  async getSystemStatus(): Promise<any> {
    const status = {
      agents: {},
      activeTasks: this.activeTasks.size,
      totalProcessed: 0,
      avgLatency: 0,
      cacheHitRate: 0,
      systemHealth: 'healthy'
    };
    for (const [name, agent] of this.agents.entries()) {
      const queueLength = this.agentQueues.get(name)?.length || 0;
      
      (status.agents as any)[name] = {
        model: agent.model,
        role: agent.role,
        status: queueLength > 0 ? 'busy' : 'idle',
        queueLength,
        maxConcurrency: agent.maxConcurrency,
        avgLatency: agent.averageLatency
      };
    }
    return status;
  }
  async shutdown(): Promise<void> {
    console.log('🔌 Shutting down cognitive orchestrator...');
    this.isProcessing = false;
    this.activeTasks.clear();
    this.agentQueues.clear();
    console.log('👋 Cognitive orchestrator shutdown complete');
  }
}