alphe-redis-mcp-server
Version:
The most comprehensive Redis MCP Server for Alphe.AI - Optimized for sub-5 second response times with multi-layer caching
693 lines (601 loc) • 20.6 kB
text/typescript
import { AlpheRedisClient } from '../core/redis-client.js';
// Cognitive Agent Configuration
export interface CognitiveAgent {
name: string;
model: string;
role: 'perception' | 'context' | 'planning' | 'reasoning' | 'reflection' | 'orchestrator';
endpoint: string;
maxConcurrency: number;
averageLatency: number;
costPerToken: number;
}
export interface CognitiveTask {
id: string;
type: 'perception' | 'context_engineering' | 'planning' | 'reasoning' | 'reflection';
input: any;
priority: number;
deadline: number;
dependencies: string[];
assignedAgent?: string;
status: 'pending' | 'processing' | 'completed' | 'failed';
result?: any;
error?: string;
startTime?: number;
endTime?: number;
}
export class CognitiveOrchestrator {
private redisClient: AlpheRedisClient;
private agents: Map<string, CognitiveAgent>;
private activeTasks: Map<string, CognitiveTask>;
private agentQueues: Map<string, CognitiveTask[]>;
private isProcessing: boolean = false;
// Free model configuration matching your setup
private readonly FREE_AGENTS: CognitiveAgent[] = [
{
name: 'perception_agent',
model: 'gemma2-9b',
role: 'perception',
endpoint: 'http://localhost:11434/api/generate',
maxConcurrency: 3,
averageLatency: 200, // ms
costPerToken: 0 // free
},
{
name: 'context_engineer',
model: 'phi-3-mini',
role: 'context',
endpoint: 'http://localhost:11434/api/generate',
maxConcurrency: 4,
averageLatency: 150,
costPerToken: 0
},
{
name: 'planning_agent',
model: 'qwq-32b',
role: 'planning',
endpoint: 'http://localhost:11434/api/generate',
maxConcurrency: 2,
averageLatency: 500,
costPerToken: 0
},
{
name: 'reasoning_agent',
model: 'deepseek-r1',
role: 'reasoning',
endpoint: 'http://localhost:11434/api/generate',
maxConcurrency: 2,
averageLatency: 400,
costPerToken: 0
},
{
name: 'reflection_agent',
model: 'llama-3.3-70b',
role: 'reflection',
endpoint: 'http://localhost:11434/api/generate',
maxConcurrency: 1,
averageLatency: 800,
costPerToken: 0
},
{
name: 'orchestrator_agent',
model: 'mixtral-8x7b',
role: 'orchestrator',
endpoint: 'http://localhost:11434/api/generate',
maxConcurrency: 2,
averageLatency: 300,
costPerToken: 0
}
];
constructor(redisClient: AlpheRedisClient) {
this.redisClient = redisClient;
this.agents = new Map();
this.activeTasks = new Map();
this.agentQueues = new Map();
// Initialize agents
this.FREE_AGENTS.forEach(agent => {
this.agents.set(agent.name, agent);
this.agentQueues.set(agent.name, []);
});
}
// Process user query through cognitive pipeline
async processQuery(query: string, context?: any): Promise<{
response: string;
cognitiveInsights: any;
latency: number;
cachingStrategy: any;
}> {
const startTime = performance.now();
const sessionId = `session_${Date.now()}`;
console.log('🧠 Starting cognitive processing pipeline...');
// Create cognitive tasks that run in parallel
const tasks: CognitiveTask[] = [
{
id: `perception_${sessionId}`,
type: 'perception',
input: { query, context },
priority: 1,
deadline: Date.now() + 500, // 500ms deadline
dependencies: [],
status: 'pending'
},
{
id: `context_${sessionId}`,
type: 'context_engineering',
input: { query, context },
priority: 1,
deadline: Date.now() + 300, // 300ms deadline
dependencies: [],
status: 'pending'
},
{
id: `planning_${sessionId}`,
type: 'planning',
input: { query, context },
priority: 2,
deadline: Date.now() + 800,
dependencies: [`perception_${sessionId}`],
status: 'pending'
},
{
id: `reasoning_${sessionId}`,
type: 'reasoning',
input: { query, context },
priority: 2,
deadline: Date.now() + 600,
dependencies: [`context_${sessionId}`],
status: 'pending'
}
];
// Store tasks in Redis for distributed processing
for (const task of tasks) {
await this.redisClient.set(`cognitive_task:${task.id}`, task, {
ttl: 60,
namespace: 'cognitive'
});
this.activeTasks.set(task.id, task);
}
// Process all tasks in parallel
const results = await this.executeTasksInParallel(tasks);
// Generate final response using orchestrator
const finalResponse = await this.generateFinalResponse(query, results);
const totalLatency = performance.now() - startTime;
console.log(`✅ Cognitive processing completed in ${totalLatency.toFixed(2)}ms`);
return {
response: finalResponse.response,
cognitiveInsights: {
perceptionResult: results.perception,
contextEngineering: results.context_engineering,
planningResult: results.planning,
reasoningResult: results.reasoning,
processingLatency: totalLatency,
agentsUsed: Object.keys(results),
cachingUsed: finalResponse.fromCache
},
latency: totalLatency,
cachingStrategy: {
queryHash: finalResponse.queryHash,
cacheHit: finalResponse.fromCache,
cacheLevel: finalResponse.cacheLevel,
nextPredict: finalResponse.predictions
}
};
}
// Execute cognitive tasks in parallel with intelligent scheduling
private async executeTasksInParallel(tasks: CognitiveTask[]): Promise<any> {
const results: any = {};
const startTime = Date.now();
// Separate tasks by dependencies
const independentTasks = tasks.filter(t => t.dependencies.length === 0);
const dependentTasks = tasks.filter(t => t.dependencies.length > 0);
console.log(`🚀 Starting ${independentTasks.length} independent cognitive tasks...`);
// Start all independent tasks immediately
const independentPromises = independentTasks.map(task =>
this.executeTask(task).then(result => {
results[task.type] = result;
task.status = 'completed';
})
);
// Wait for independent tasks with timeout
try {
await Promise.all(independentPromises.map(p =>
Promise.race([
p,
new Promise((_, reject) =>
setTimeout(() => reject(new Error('Task timeout')), 1000)
)
])
));
} catch (error) {
console.warn('⚠️ Some independent tasks timed out, continuing with available results');
}
console.log(`⚡ Independent tasks completed, starting ${dependentTasks.length} dependent tasks...`);
// Now execute dependent tasks
const dependentPromises = dependentTasks.map(async (task) => {
// Check if dependencies are met
const dependenciesMet = task.dependencies.every(depId => {
const depTask = this.activeTasks.get(depId);
return depTask?.status === 'completed';
});
if (dependenciesMet) {
try {
const result = await this.executeTask(task);
results[task.type] = result;
task.status = 'completed';
} catch (error) {
console.error(`❌ Task ${task.id} failed:`, error);
task.status = 'failed';
task.error = error instanceof Error ? error.message : 'Unknown error';
}
} else {
console.warn(`⏳ Task ${task.id} dependencies not met, using fallback`);
results[task.type] = { fallback: true, reason: 'dependencies_not_met' };
}
});
await Promise.allSettled(dependentPromises);
const totalLatency = Date.now() - startTime;
console.log(`🎯 All cognitive tasks completed in ${totalLatency}ms`);
return results;
}
// Execute individual cognitive task
private async executeTask(task: CognitiveTask): Promise<any> {
const agent = this.getOptimalAgent(task.type);
if (!agent) {
throw new Error(`No agent available for task type: ${task.type}`);
}
task.assignedAgent = agent.name;
task.status = 'processing';
task.startTime = Date.now();
console.log(`🤖 ${agent.name} (${agent.model}) processing ${task.type} task...`);
try {
// Check cache first
const cacheKey = `cognitive:${task.type}:${JSON.stringify(task.input)}`;
const cachedResult = await this.redisClient.get(cacheKey);
if (cachedResult.success && cachedResult.data) {
console.log(`⚡ Cache hit for ${task.type} task`);
return {
...cachedResult.data,
cached: true,
latency: 5 // Near-zero latency for cached results
};
}
// Execute the cognitive task
const result = await this.callCognitiveAgent(agent, task);
// Cache the result for future queries
await this.redisClient.set(cacheKey, result, {
ttl: 300, // 5 minutes cache
namespace: 'cognitive',
tags: [task.type, agent.model]
});
task.endTime = Date.now();
task.result = result;
console.log(`✅ ${agent.name} completed ${task.type} in ${task.endTime - task.startTime!}ms`);
return result;
} catch (error) {
task.status = 'failed';
task.error = error instanceof Error ? error.message : 'Unknown error';
task.endTime = Date.now();
throw error;
}
}
// Call cognitive agent with optimized prompts
private async callCognitiveAgent(agent: CognitiveAgent, task: CognitiveTask): Promise<any> {
const prompt = this.generateOptimizedPrompt(agent.role, task.input);
try {
const response = await fetch(agent.endpoint, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: agent.model,
prompt: prompt,
stream: false,
options: {
temperature: agent.role === 'reasoning' ? 0.1 : 0.3,
top_p: 0.9,
max_tokens: agent.role === 'reflection' ? 1000 : 500,
num_predict: agent.role === 'context' ? 200 : 400
}
}),
signal: AbortSignal.timeout(2000) // 2 second timeout
});
if (!response.ok) {
throw new Error(`Agent ${agent.name} returned ${response.status}`);
}
const data = await response.json();
return {
content: data.response || '',
model: agent.model,
tokens: data.eval_count || 0,
latency: data.total_duration ? Math.round(data.total_duration / 1000000) : 0, // ns to ms
cached: false
};
} catch (error) {
console.error(`❌ Agent ${agent.name} failed:`, error);
// Return fallback response to keep system working
return {
content: this.getFallbackResponse(agent.role, task.input),
model: agent.model,
tokens: 0,
latency: 0,
cached: false,
fallback: true,
error: error instanceof Error ? error.message : 'Unknown error'
};
}
}
// Generate optimized prompts for each agent type
private generateOptimizedPrompt(role: string, input: any): string {
const { query, context } = input;
switch (role) {
case 'perception':
return `Analyze this user query and extract key information:
Query: "${query}"
Context: ${JSON.stringify(context || {})}
Extract:
1. Intent (what user wants)
2. Entities (important objects/names)
3. Urgency (1-10)
4. Domain (tech, general, creative, etc.)
5. Expected response type
Respond in JSON format only.`;
case 'context':
return `Optimize this query for better AI processing:
Query: "${query}"
Context: ${JSON.stringify(context || {})}
Provide:
1. Rewritten query (clearer, more specific)
2. Missing context needed
3. Suggested follow-up questions
4. Key terms to emphasize
5. Processing strategy
Respond in JSON format only.`;
case 'planning':
return `Create an execution plan for this query:
Query: "${query}"
Context: ${JSON.stringify(context || {})}
Plan:
1. Step-by-step approach
2. Required resources/tools
3. Expected challenges
4. Success criteria
5. Fallback options
Respond in JSON format only.`;
case 'reasoning':
return `Apply logical reasoning to this query:
Query: "${query}"
Context: ${JSON.stringify(context || {})}
Reasoning:
1. Analysis of the problem
2. Logical deductions
3. Evidence/assumptions
4. Confidence level (1-10)
5. Alternative perspectives
Respond in JSON format only.`;
case 'reflection':
return `Reflect on and improve this AI interaction:
Query: "${query}"
Context: ${JSON.stringify(context || {})}
Reflection:
1. Response quality assessment
2. Potential improvements
3. User satisfaction prediction
4. Learning opportunities
5. Optimization suggestions
Respond in JSON format only.`;
default:
return `Process this query: "${query}" with context: ${JSON.stringify(context || {})}`;
}
}
// Get optimal agent for task type
private getOptimalAgent(taskType: string): CognitiveAgent | null {
const agents = Array.from(this.agents.values()).filter(agent => {
switch (taskType) {
case 'perception':
return agent.role === 'perception';
case 'context_engineering':
return agent.role === 'context';
case 'planning':
return agent.role === 'planning';
case 'reasoning':
return agent.role === 'reasoning';
case 'reflection':
return agent.role === 'reflection';
default:
return agent.role === 'orchestrator';
}
});
if (agents.length === 0) return null;
// Select agent with lowest current load
return agents.sort((a, b) => {
const queueA = this.agentQueues.get(a.name)?.length || 0;
const queueB = this.agentQueues.get(b.name)?.length || 0;
return queueA - queueB;
})[0];
}
// Generate final response using all cognitive insights
private async generateFinalResponse(query: string, cognitiveResults: any): Promise<{
response: string;
queryHash: string;
fromCache: boolean;
cacheLevel?: string;
predictions?: string[];
}> {
// Create hash for caching
const queryHash = Buffer.from(query + JSON.stringify(cognitiveResults)).toString('base64').slice(0, 32);
const cacheKey = `final_response:${queryHash}`;
// Check cache first
const cachedResponse = await this.redisClient.get(cacheKey);
if (cachedResponse.success && cachedResponse.data) {
return {
response: cachedResponse.data.response,
queryHash,
fromCache: true,
cacheLevel: cachedResponse.fromLevel,
predictions: cachedResponse.data.predictions
};
}
// Generate response using orchestrator
const orchestrator = this.agents.get('orchestrator_agent');
if (!orchestrator) {
throw new Error('Orchestrator agent not available');
}
const finalTask: CognitiveTask = {
id: `orchestrator_${Date.now()}`,
type: 'reasoning',
input: {
query,
cognitiveResults,
instruction: 'Synthesize all cognitive insights into a comprehensive response'
},
priority: 1,
deadline: Date.now() + 1000,
dependencies: [],
status: 'pending'
};
const orchestratorResult = await this.callCognitiveAgent(orchestrator, finalTask);
// Generate predictive caching suggestions
const predictions = this.generatePredictions(query, cognitiveResults);
const finalResponse = {
response: orchestratorResult.content || 'Response generated using cognitive pipeline',
queryHash,
fromCache: false,
predictions
};
// Cache for future similar queries
await this.redisClient.set(cacheKey, finalResponse, {
ttl: 1800, // 30 minutes
namespace: 'responses',
tags: ['final', 'cognitive', 'orchestrated']
});
return finalResponse;
}
// Generate fallback responses to keep system responsive
private getFallbackResponse(role: string, input: any): string {
const { query } = input;
switch (role) {
case 'perception':
return JSON.stringify({
intent: 'information_request',
entities: [],
urgency: 5,
domain: 'general',
expectedResponseType: 'explanation'
});
case 'context':
return JSON.stringify({
rewrittenQuery: query,
missingContext: [],
followUpQuestions: [],
keyTerms: [],
processingStrategy: 'direct'
});
case 'planning':
return JSON.stringify({
steps: ['Analyze query', 'Generate response'],
requiredResources: [],
challenges: [],
successCriteria: ['Helpful response'],
fallbacks: ['Use general knowledge']
});
default:
return 'Processing...';
}
}
// Generate predictions for proactive caching
private generatePredictions(query: string, results: any): string[] {
const predictions = [];
// Extract entities and topics for related queries
if (results.perception?.entities) {
results.perception.entities.forEach((entity: string) => {
predictions.push(`What is ${entity}?`);
predictions.push(`Tell me more about ${entity}`);
});
}
// Add domain-specific predictions
if (results.perception?.domain === 'tech') {
predictions.push('How does this work?');
predictions.push('What are the alternatives?');
predictions.push('Is this secure?');
}
return predictions.slice(0, 5); // Limit to top 5 predictions
}
// Start cognitive background processing
async startBackgroundProcessing(): Promise<void> {
if (this.isProcessing) return;
this.isProcessing = true;
console.log('🧠 Starting cognitive background processing...');
// Keep agents warm by sending health checks
setInterval(async () => {
for (const agent of this.agents.values()) {
try {
await fetch(agent.endpoint, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
model: agent.model,
prompt: 'ping',
stream: false,
options: { max_tokens: 1 }
}),
signal: AbortSignal.timeout(1000)
});
} catch (error) {
console.warn(`⚠️ Agent ${agent.name} health check failed`);
}
}
}, 30000); // Every 30 seconds
// Process queued tasks
setInterval(async () => {
await this.processQueuedTasks();
}, 1000); // Every second
console.log('✅ Cognitive background processing started');
}
private async processQueuedTasks(): Promise<void> {
for (const [agentName, queue] of this.agentQueues.entries()) {
if (queue.length === 0) continue;
const agent = this.agents.get(agentName);
if (!agent) continue;
// Process tasks within concurrency limits
const tasksToProcess = queue.splice(0, agent.maxConcurrency);
const promises = tasksToProcess.map(async (task) => {
try {
await this.executeTask(task);
} catch (error) {
console.error(`❌ Queued task ${task.id} failed:`, error);
}
});
await Promise.allSettled(promises);
}
}
// Get cognitive system status
async getSystemStatus(): Promise<any> {
const status = {
agents: {},
activeTasks: this.activeTasks.size,
totalProcessed: 0,
avgLatency: 0,
cacheHitRate: 0,
systemHealth: 'healthy'
};
for (const [name, agent] of this.agents.entries()) {
const queueLength = this.agentQueues.get(name)?.length || 0;
(status.agents as any)[name] = {
model: agent.model,
role: agent.role,
status: queueLength > 0 ? 'busy' : 'idle',
queueLength,
maxConcurrency: agent.maxConcurrency,
avgLatency: agent.averageLatency
};
}
return status;
}
async shutdown(): Promise<void> {
console.log('🔌 Shutting down cognitive orchestrator...');
this.isProcessing = false;
this.activeTasks.clear();
this.agentQueues.clear();
console.log('👋 Cognitive orchestrator shutdown complete');
}
}