codecrucible-synth
Version:
Production-Ready AI Development Platform with Multi-Voice Synthesis, Smithery MCP Integration, Enterprise Security, and Zero-Timeout Reliability
540 lines • 24.6 kB
JavaScript
/**
* Request Execution Manager - Extracted from UnifiedModelClient
* Manages request processing, execution strategies, and fallback logic following Living Spiral methodology
*
* Council Perspectives Applied:
* - Performance Engineer: Optimized execution strategies and timeout handling
* - Maintainer: Reliable request processing and error recovery
* - Security Guardian: Safe request validation and execution sandboxing
* - Explorer: Flexible execution modes and strategy adaptation
* - Architect: Clean separation between strategy, execution, and coordination
*/
import { EventEmitter } from 'events';
import { logger } from '../logger.js';
import { getErrorMessage, toError } from '../../utils/error-utils.js';
import { getGlobalEnhancedToolIntegration } from '../tools/enhanced-tool-integration.js';
import { getGlobalToolIntegration } from '../tools/tool-integration.js';
import { requestBatcher } from '../performance/intelligent-request-batcher.js';
import { adaptiveTuner } from '../performance/adaptive-performance-tuner.js';
import { requestTimeoutOptimizer } from '../performance/request-timeout-optimizer.js';
export class RequestExecutionManager extends EventEmitter {
config;
activeRequests = new Map();
requestQueue = [];
processManager;
providerRepository;
isShuttingDown = false;
queueProcessor = null;
constructor(config, processManager, providerRepository) {
super();
this.config = config;
this.processManager = processManager;
this.providerRepository = providerRepository;
// Start event-driven queue processor instead of infinite loop
this.scheduleQueueProcessor();
// Handle shutdown gracefully
process.once('SIGTERM', () => this.shutdown());
process.once('SIGINT', () => this.shutdown());
}
/**
* Main request processing method
*/
async processRequest(request, context) {
const requestId = this.generateRequestId();
const strategy = this.determineExecutionStrategy(request, context);
const startTime = Date.now();
logger.info(`🚀 Processing request ${requestId} with ${strategy.mode} mode via ${strategy.provider}`);
// Check if request is eligible for batching (non-streaming, non-tool requests)
const isBatchEligible = !request.stream &&
(!request.tools || request.tools.length === 0) &&
strategy.mode !== 'fast'; // Fast mode bypasses batching
if (isBatchEligible) {
logger.debug(`Request ${requestId} eligible for batching`);
try {
// Use intelligent batching for similar requests
const batchResult = await requestBatcher.batchRequest(request.prompt, strategy.provider, strategy.provider, {
temperature: request.temperature,
maxTokens: request.maxTokens,
mode: strategy.mode,
priority: this.getRequestPriority(request)
}, request.tools);
// Record performance metrics for adaptive tuning
const responseTime = Date.now() - startTime;
const errorRate = 0; // Successful batch response
adaptiveTuner.recordMetrics(responseTime, 1, errorRate);
logger.info(`✅ Request ${requestId} completed via batching in ${responseTime}ms`);
return {
content: batchResult.content,
model: strategy.provider,
provider: strategy.provider,
metadata: {
tokens: batchResult.usage?.totalTokens || 0,
latency: responseTime,
selectedProvider: strategy.provider,
fromBatch: true
}
};
}
catch (error) {
logger.warn(`Batching failed for ${requestId}, falling back to individual processing:`, error);
// Fall through to normal processing
}
}
// Register process with process manager
const processType = this.getProcessType(request);
const priority = this.getRequestPriority(request);
const memoryUsage = this.estimateMemoryUsage(request);
const activeProcess = this.processManager.registerProcess({
type: processType,
modelName: strategy.provider,
estimatedMemoryUsage: memoryUsage,
promise: Promise.resolve(),
priority,
});
try {
const response = await this.executeWithFallback(requestId, request, context, strategy);
// Mark process as completed
this.processManager.unregisterProcess(activeProcess.id);
// Record performance metrics for adaptive tuning
const responseTime = Date.now() - startTime;
const throughput = 1; // 1 request completed
const errorRate = 0; // Successful response
adaptiveTuner.recordMetrics(responseTime, throughput, errorRate);
logger.info(`✅ Request ${requestId} completed successfully in ${responseTime}ms`);
return response;
}
catch (error) {
// Mark process as failed
this.processManager.unregisterProcess(activeProcess.id);
// Record failed request metrics
const responseTime = Date.now() - startTime;
const throughput = 0; // Failed request
const errorRate = 1; // 100% error rate for this request
adaptiveTuner.recordMetrics(responseTime, throughput, errorRate);
logger.error(`❌ Request ${requestId} failed after ${responseTime}ms:`, getErrorMessage(error));
throw toError(error);
}
}
/**
* Execute request with fallback chain
*/
async executeWithFallback(requestId, request, context, strategy, abortSignal) {
const fallbackChain = strategy.provider === 'auto'
? ['ollama', 'lm-studio', 'huggingface'] // Default fallback chain
: [strategy.provider, 'ollama', 'lm-studio', 'huggingface'].filter((p, i, arr) => arr.indexOf(p) === i);
let lastError = null;
for (const providerType of fallbackChain) {
const provider = this.providerRepository.getProvider(providerType);
if (!provider) {
logger.warn(`Provider ${providerType} not available, skipping`);
continue;
}
try {
const metrics = {
provider: providerType,
model: provider.getModelName?.() || 'unknown',
startTime: Date.now(),
success: false,
};
this.activeRequests.set(requestId, metrics);
this.emit('requestStart', { requestId, provider: providerType });
logger.info(`🚀 Attempting request with ${providerType}`);
// Add tool integration before calling provider
const enhancedToolIntegration = getGlobalEnhancedToolIntegration();
const toolIntegration = enhancedToolIntegration || getGlobalToolIntegration();
const supportsTools = this.modelSupportsTools(providerType, provider.getModelName?.());
const tools = supportsTools && toolIntegration ? toolIntegration.getLLMFunctions() : [];
// DEBUG: Log tool integration status
logger.info('🔧 TOOL DEBUG: Request execution tool integration status', {
provider: providerType,
model: provider.getModelName?.() || 'unknown',
supportsTools,
hasEnhanced: !!enhancedToolIntegration,
hasBasic: !!getGlobalToolIntegration(),
hasIntegration: !!toolIntegration,
toolCount: tools.length,
});
if (tools.length > 0) {
logger.info('🔧 TOOL DEBUG: Available tools for request execution', {
toolNames: tools.map(t => t.function.name),
firstTool: tools[0]
});
}
else {
logger.warn('🔧 TOOL DEBUG: No tools available for request execution!');
}
// Add tools to request before calling provider
const requestWithTools = {
...request,
tools: tools
};
// Create optimized timeout for this request
const requestType = request.stream ? 'streaming' :
(request.tools && request.tools.length > 0) ? 'tool_execution' : 'regular';
const { abortController, timeout: optimizedTimeout } = requestTimeoutOptimizer.createOptimizedTimeout(requestId, requestType, providerType, strategy.timeout);
// Add abort signal to request
const requestWithAbort = {
...requestWithTools,
abortSignal: abortController.signal
};
const response = await Promise.race([
provider.processRequest(requestWithAbort, context),
this.createOptimizedTimeoutPromise(optimizedTimeout, abortController)
]);
// Mark request as completed successfully
requestTimeoutOptimizer.completeRequest(requestId);
// Check if response contains tool calls that need to be executed
if (response.toolCalls && response.toolCalls.length > 0) {
logger.debug('Tool execution: Found tool calls in request execution', {
count: response.toolCalls.length,
});
if (toolIntegration) {
try {
const toolResults = [];
// Execute each tool call
for (const toolCall of response.toolCalls) {
logger.debug('Executing tool in request execution', {
toolName: toolCall.name || toolCall.function?.name,
});
// Convert to expected format if needed
const formattedToolCall = {
function: {
name: toolCall.name || toolCall.function?.name,
arguments: JSON.stringify(toolCall.arguments || toolCall.function?.arguments || {}),
},
};
const result = await toolIntegration.executeToolCall(formattedToolCall);
logger.debug('Tool execution result in request execution', { result });
toolResults.push(result);
}
// If we have tool results, format them into a readable response
if (toolResults.length > 0) {
const firstResult = toolResults[0];
if (firstResult.success && firstResult.output) {
// Return the actual tool result as the content
const content = firstResult.output.content || firstResult.output;
response.content = content;
response.metadata = {
tokens: 0,
latency: 0,
...response.metadata
};
logger.info('🔧 TOOL EXECUTION: Tool successfully executed in request execution', {
toolName: response.toolCalls[0]?.name || response.toolCalls[0]?.function?.name,
resultContent: content
});
}
else if (firstResult.error) {
response.content = `Error executing tool: ${firstResult.error}`;
logger.error('Tool execution error in request execution', { error: firstResult.error });
}
}
}
catch (error) {
logger.error('Error during tool execution in request execution', { error: getErrorMessage(error) });
response.content = `Error executing tools: ${getErrorMessage(error)}`;
}
}
else {
logger.warn('Tool integration not available for tool execution in request execution');
}
}
metrics.endTime = Date.now();
metrics.success = true;
metrics.tokenCount = response.usage?.totalTokens;
this.activeRequests.delete(requestId);
this.emit('requestSuccess', {
requestId,
provider: providerType,
duration: metrics.endTime - metrics.startTime,
});
logger.info(`✅ Request ${requestId} succeeded with ${providerType} in ${metrics.endTime - metrics.startTime}ms`);
return response;
}
catch (error) {
const errorMessage = getErrorMessage(error);
lastError = toError(error);
const metrics = this.activeRequests.get(requestId);
if (metrics) {
metrics.endTime = Date.now();
metrics.error = errorMessage;
}
// Request failed - timeout optimizer handles cleanup automatically
logger.warn(`❌ ${providerType} failed for request ${requestId}: ${errorMessage}`);
this.emit('requestError', { requestId, provider: providerType, error: errorMessage });
// Continue to next provider in fallback chain
}
}
// All providers failed
this.activeRequests.delete(requestId);
const error = lastError || new Error('All providers in fallback chain failed');
throw error;
}
/**
* Determine execution strategy based on request characteristics
*/
determineExecutionStrategy(request, context) {
const prompt = request.prompt || '';
const complexity = this.assessComplexityFast(prompt);
// Default strategy
let strategy = {
mode: 'balanced',
provider: 'auto',
timeout: this.config.complexityTimeouts.medium,
complexity,
};
// Adjust based on request characteristics
if (request.stream) {
strategy.mode = 'fast';
strategy.timeout = this.config.complexityTimeouts.simple;
}
if (request.tools && request.tools.length > 0) {
strategy.provider = 'lm-studio'; // Prefer LM Studio for tool use
strategy.timeout = this.config.complexityTimeouts.complex;
}
// Adjust timeout based on complexity
switch (complexity) {
case 'simple':
strategy.timeout = this.config.complexityTimeouts.simple;
if (strategy.mode === 'balanced')
strategy.mode = 'fast';
break;
case 'complex':
strategy.timeout = this.config.complexityTimeouts.complex;
if (strategy.mode === 'fast')
strategy.mode = 'balanced';
break;
default:
strategy.timeout = this.config.complexityTimeouts.medium;
}
// Consider context if available (could be enhanced with more context analysis)
if (context && context.files && context.files.length > 10) {
// Large project context might need more time
strategy.timeout = Math.max(strategy.timeout, this.config.complexityTimeouts.complex);
}
logger.debug('Execution strategy determined:', strategy);
return strategy;
}
/**
* Get currently active requests
*/
getActiveRequests() {
return new Map(this.activeRequests);
}
/**
* Assess request complexity quickly
*/
assessComplexityFast(prompt) {
const length = prompt.length;
const complexKeywords = [
'analyze',
'architecture',
'security',
'performance',
'optimize',
'refactor',
];
const hasComplexKeywords = complexKeywords.some(keyword => prompt.toLowerCase().includes(keyword));
if (length < 100 && !hasComplexKeywords)
return 'simple';
if (length > 500 || hasComplexKeywords)
return 'complex';
return 'medium';
}
/**
* Estimate memory usage for request
*/
estimateMemoryUsage(request) {
const baseUsage = 50; // MB base usage
const promptSize = (request.prompt?.length || 0) * 0.001; // Rough estimate
const contextSize = request.context ? Object.keys(request.context).length * 5 : 0;
return Math.round(baseUsage + promptSize + contextSize);
}
/**
* Determine process type for process manager
*/
getProcessType(request) {
if (request.stream)
return 'streaming';
if (request.tools && request.tools.length > 0)
return 'generation';
return 'model_inference';
}
/**
* Get request priority
*/
getRequestPriority(request) {
if (request.stream)
return 'high'; // Streaming requests need responsiveness
if (request.tools && request.tools.length > 0)
return 'medium'; // Tool use is important
return 'low'; // Regular inference
}
/**
* Create timeout promise for race conditions
*/
createTimeoutPromise(timeoutMs) {
return new Promise((_, reject) => {
setTimeout(() => {
reject(new Error(`Request timed out after ${timeoutMs}ms`));
}, timeoutMs);
});
}
/**
* Create optimized timeout promise with abort controller
*/
createOptimizedTimeoutPromise(timeoutMs, abortController) {
return new Promise((_, reject) => {
const timeoutId = setTimeout(() => {
abortController.abort();
reject(new Error(`Request timed out after optimized ${timeoutMs}ms`));
}, timeoutMs);
// Clear timeout if request is aborted externally
abortController.signal.addEventListener('abort', () => {
clearTimeout(timeoutId);
reject(new Error('Request aborted'));
});
});
}
/**
* Generate unique request ID
*/
generateRequestId() {
return `req_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
}
/**
* Schedule queue processor (event-driven, not infinite loop)
*/
scheduleQueueProcessor() {
if (this.isShuttingDown || this.queueProcessor) {
return;
}
this.queueProcessor = setTimeout(() => {
this.queueProcessor = null;
this.processQueueBatch();
}, 50); // Reduced from 100ms for better responsiveness
}
/**
* Process queue batch efficiently
*/
async processQueueBatch() {
if (this.isShuttingDown) {
return;
}
// Process multiple items in parallel if capacity allows
const availableSlots = this.config.maxConcurrentRequests - this.activeRequests.size;
const batchSize = Math.min(availableSlots, this.requestQueue.length, 3); // Max 3 concurrent
if (batchSize <= 0) {
// No capacity, reschedule
this.scheduleQueueProcessor();
return;
}
const batch = this.requestQueue.splice(0, batchSize);
// Process batch items concurrently
const batchPromises = batch.map(async (queueItem) => {
try {
const response = await this.processRequest(queueItem.request, queueItem.context);
queueItem.resolve(response);
}
catch (error) {
queueItem.reject(error);
}
});
// Process batch without blocking
Promise.allSettled(batchPromises);
// Reschedule if there are more items to process
if (this.requestQueue.length > 0) {
this.scheduleQueueProcessor();
}
}
/**
* Queue request if at capacity
*/
async queueRequest(request, context) {
if (this.activeRequests.size < this.config.maxConcurrentRequests) {
return this.processRequest(request, context);
}
return new Promise((resolve, reject) => {
this.requestQueue.push({
request,
context,
resolve: resolve,
reject: reject,
});
logger.info('Request queued due to capacity limit');
// Trigger queue processor
this.scheduleQueueProcessor();
});
}
/**
* Graceful shutdown
*/
async shutdown() {
logger.info('🔄 Shutting down RequestExecutionManager...');
this.isShuttingDown = true;
// Clear scheduled queue processor
if (this.queueProcessor) {
clearTimeout(this.queueProcessor);
this.queueProcessor = null;
}
// Reject any pending requests
while (this.requestQueue.length > 0) {
const queueItem = this.requestQueue.shift();
if (queueItem) {
queueItem.reject(new Error('System shutting down'));
}
}
// Wait for active requests to complete (with timeout)
const shutdownTimeout = 10000; // 10 seconds
const startTime = Date.now();
while (this.activeRequests.size > 0 && (Date.now() - startTime) < shutdownTimeout) {
await new Promise(resolve => setTimeout(resolve, 100));
}
if (this.activeRequests.size > 0) {
logger.warn(`${this.activeRequests.size} requests still active during shutdown`);
}
logger.info('✅ RequestExecutionManager shutdown complete');
}
/**
* Check if provider/model combination supports tools
*/
modelSupportsTools(provider, model) {
if (provider === 'lm-studio') {
return true; // LM Studio generally supports OpenAI-compatible function calling
}
if (provider === 'ollama') {
// If no specific model provided, assume auto-selection will pick a supported model
if (!model) {
logger.debug('No specific model provided, assuming auto-selection will pick supported model');
return true; // Trust that auto-selection picks qwen2.5-coder which supports tools
}
// Only certain Ollama models support function calling
const model_name = model.toLowerCase();
const supportedModels = [
'llama3',
'llama3.1',
'llama3.2',
'qwen2.5',
'qwq',
'mistral',
'codellama',
];
const isSupported = supportedModels.some(supportedModel => model_name.includes(supportedModel));
logger.debug('Model tool support check', { model: model_name, isSupported });
return isSupported;
}
return false; // Conservative default - no tools for unknown providers
}
/**
* Get execution statistics
*/
getExecutionStats() {
return {
activeRequests: this.activeRequests.size,
queuedRequests: this.requestQueue.length,
maxConcurrent: this.config.maxConcurrentRequests,
config: this.config,
};
}
}
//# sourceMappingURL=request-execution-manager.js.map