codecrucible-synth
Version:
Production-Ready AI Development Platform with Multi-Voice Synthesis, Smithery MCP Integration, Enterprise Security, and Zero-Timeout Reliability
720 lines (620 loc) • 24.3 kB
text/typescript
/**
* Request Execution Manager - Extracted from UnifiedModelClient
* Manages request processing, execution strategies, and fallback logic following Living Spiral methodology
*
* Council Perspectives Applied:
* - Performance Engineer: Optimized execution strategies and timeout handling
* - Maintainer: Reliable request processing and error recovery
* - Security Guardian: Safe request validation and execution sandboxing
* - Explorer: Flexible execution modes and strategy adaptation
* - Architect: Clean separation between strategy, execution, and coordination
*/
import { EventEmitter } from 'events';
import { logger } from '../logger.js';
import { getErrorMessage, toError } from '../../utils/error-utils.js';
import {
ProjectContext,
ModelRequest,
ModelResponse,
ComplexityAnalysis,
TaskType,
} from '../types.js';
import { ActiveProcess, ActiveProcessManager } from '../performance/active-process-manager.js';
import { getGlobalEnhancedToolIntegration } from '../tools/enhanced-tool-integration.js';
import { getGlobalToolIntegration } from '../tools/tool-integration.js';
import { requestBatcher } from '../performance/intelligent-request-batcher.js';
import { adaptiveTuner } from '../performance/adaptive-performance-tuner.js';
import { requestTimeoutOptimizer } from '../performance/request-timeout-optimizer.js';
export type ExecutionMode = 'fast' | 'quality' | 'balanced';
export type ProviderType = 'ollama' | 'lm-studio' | 'huggingface' | 'auto';
export interface ExecutionStrategy {
mode: ExecutionMode;
provider: ProviderType;
timeout: number;
complexity: string;
}
export interface RequestMetrics {
provider: ProviderType;
model: string;
startTime: number;
endTime?: number;
success: boolean;
tokenCount?: number;
error?: string;
}
export interface ExecutionConfig {
maxConcurrentRequests: number;
defaultTimeout: number;
complexityTimeouts: {
simple: number;
medium: number;
complex: number;
};
memoryThresholds: {
low: number;
medium: number;
high: number;
};
}
export interface IRequestExecutionManager {
processRequest(request: ModelRequest, context?: ProjectContext): Promise<ModelResponse>;
executeWithFallback(
requestId: string,
request: ModelRequest,
context: ProjectContext | undefined,
strategy: ExecutionStrategy,
abortSignal?: AbortSignal
): Promise<ModelResponse>;
determineExecutionStrategy(request: ModelRequest, context?: ProjectContext): ExecutionStrategy;
getActiveRequests(): Map<string, RequestMetrics>;
}
export class RequestExecutionManager extends EventEmitter implements IRequestExecutionManager {
private config: ExecutionConfig;
private activeRequests = new Map<string, RequestMetrics>();
private requestQueue: Array<{
request: ModelRequest;
context?: ProjectContext;
resolve: (value: ModelResponse) => void;
reject: (reason?: any) => void;
}> = [];
private processManager: ActiveProcessManager;
private providerRepository: any;
private isShuttingDown = false;
private queueProcessor: NodeJS.Timeout | null = null;
constructor(
config: ExecutionConfig,
processManager: ActiveProcessManager,
providerRepository: any
) {
super();
this.config = config;
this.processManager = processManager;
this.providerRepository = providerRepository;
// Start event-driven queue processor instead of infinite loop
this.scheduleQueueProcessor();
// Handle shutdown gracefully
process.once('SIGTERM', () => this.shutdown());
process.once('SIGINT', () => this.shutdown());
}
/**
* Main request processing method
*/
async processRequest(request: ModelRequest, context?: ProjectContext): Promise<ModelResponse> {
const requestId = this.generateRequestId();
const strategy = this.determineExecutionStrategy(request, context);
const startTime = Date.now();
logger.info(
`🚀 Processing request ${requestId} with ${strategy.mode} mode via ${strategy.provider}`
);
// Check if request is eligible for batching (non-streaming, non-tool requests)
const isBatchEligible = !request.stream &&
(!request.tools || request.tools.length === 0) &&
strategy.mode !== 'fast'; // Fast mode bypasses batching
if (isBatchEligible) {
logger.debug(`Request ${requestId} eligible for batching`);
try {
// Use intelligent batching for similar requests
const batchResult = await requestBatcher.batchRequest(
request.prompt,
strategy.provider,
strategy.provider,
{
temperature: request.temperature,
maxTokens: request.maxTokens,
mode: strategy.mode,
priority: this.getRequestPriority(request)
},
request.tools
);
// Record performance metrics for adaptive tuning
const responseTime = Date.now() - startTime;
const errorRate = 0; // Successful batch response
adaptiveTuner.recordMetrics(responseTime, 1, errorRate);
logger.info(`✅ Request ${requestId} completed via batching in ${responseTime}ms`);
return {
content: batchResult.content,
model: strategy.provider,
provider: strategy.provider,
metadata: {
tokens: batchResult.usage?.totalTokens || 0,
latency: responseTime,
selectedProvider: strategy.provider,
fromBatch: true
}
};
} catch (error) {
logger.warn(`Batching failed for ${requestId}, falling back to individual processing:`, error);
// Fall through to normal processing
}
}
// Register process with process manager
const processType = this.getProcessType(request);
const priority = this.getRequestPriority(request);
const memoryUsage = this.estimateMemoryUsage(request);
const activeProcess = this.processManager.registerProcess({
type: processType,
modelName: strategy.provider,
estimatedMemoryUsage: memoryUsage,
promise: Promise.resolve(),
priority,
});
try {
const response = await this.executeWithFallback(requestId, request, context, strategy);
// Mark process as completed
this.processManager.unregisterProcess(activeProcess.id);
// Record performance metrics for adaptive tuning
const responseTime = Date.now() - startTime;
const throughput = 1; // 1 request completed
const errorRate = 0; // Successful response
adaptiveTuner.recordMetrics(responseTime, throughput, errorRate);
logger.info(`✅ Request ${requestId} completed successfully in ${responseTime}ms`);
return response;
} catch (error: unknown) {
// Mark process as failed
this.processManager.unregisterProcess(activeProcess.id);
// Record failed request metrics
const responseTime = Date.now() - startTime;
const throughput = 0; // Failed request
const errorRate = 1; // 100% error rate for this request
adaptiveTuner.recordMetrics(responseTime, throughput, errorRate);
logger.error(`❌ Request ${requestId} failed after ${responseTime}ms:`, getErrorMessage(error));
throw toError(error);
}
}
/**
* Execute request with fallback chain
*/
async executeWithFallback(
requestId: string,
request: ModelRequest,
context: ProjectContext | undefined,
strategy: ExecutionStrategy,
abortSignal?: AbortSignal
): Promise<ModelResponse> {
const fallbackChain =
strategy.provider === 'auto'
? ['ollama', 'lm-studio', 'huggingface'] // Default fallback chain
: [strategy.provider, 'ollama', 'lm-studio', 'huggingface'].filter(
(p, i, arr) => arr.indexOf(p) === i
);
let lastError: Error | null = null;
for (const providerType of fallbackChain) {
const provider = this.providerRepository.getProvider(providerType);
if (!provider) {
logger.warn(`Provider ${providerType} not available, skipping`);
continue;
}
try {
const metrics: RequestMetrics = {
provider: providerType as ProviderType,
model: provider.getModelName?.() || 'unknown',
startTime: Date.now(),
success: false,
};
this.activeRequests.set(requestId, metrics);
this.emit('requestStart', { requestId, provider: providerType });
logger.info(`🚀 Attempting request with ${providerType}`);
// Add tool integration before calling provider
const enhancedToolIntegration = getGlobalEnhancedToolIntegration();
const toolIntegration = enhancedToolIntegration || getGlobalToolIntegration();
const supportsTools = this.modelSupportsTools(
providerType as ProviderType,
provider.getModelName?.()
);
const tools = supportsTools && toolIntegration ? toolIntegration.getLLMFunctions() : [];
// DEBUG: Log tool integration status
logger.info('🔧 TOOL DEBUG: Request execution tool integration status', {
provider: providerType,
model: provider.getModelName?.() || 'unknown',
supportsTools,
hasEnhanced: !!enhancedToolIntegration,
hasBasic: !!getGlobalToolIntegration(),
hasIntegration: !!toolIntegration,
toolCount: tools.length,
});
if (tools.length > 0) {
logger.info('🔧 TOOL DEBUG: Available tools for request execution', {
toolNames: tools.map(t => t.function.name),
firstTool: tools[0]
});
} else {
logger.warn('🔧 TOOL DEBUG: No tools available for request execution!');
}
// Add tools to request before calling provider
const requestWithTools = {
...request,
tools: tools
};
// Create optimized timeout for this request
const requestType = request.stream ? 'streaming' :
(request.tools && request.tools.length > 0) ? 'tool_execution' : 'regular';
const { abortController, timeout: optimizedTimeout } = requestTimeoutOptimizer.createOptimizedTimeout(
requestId,
requestType,
providerType,
strategy.timeout
);
// Add abort signal to request
const requestWithAbort = {
...requestWithTools,
abortSignal: abortController.signal
};
const response = await Promise.race([
provider.processRequest(requestWithAbort, context),
this.createOptimizedTimeoutPromise(optimizedTimeout, abortController)
]);
// Mark request as completed successfully
requestTimeoutOptimizer.completeRequest(requestId);
// Check if response contains tool calls that need to be executed
if (response.toolCalls && response.toolCalls.length > 0) {
logger.debug('Tool execution: Found tool calls in request execution', {
count: response.toolCalls.length,
});
if (toolIntegration) {
try {
const toolResults = [];
// Execute each tool call
for (const toolCall of response.toolCalls) {
logger.debug('Executing tool in request execution', {
toolName: toolCall.name || toolCall.function?.name,
});
// Convert to expected format if needed
const formattedToolCall = {
function: {
name: toolCall.name || toolCall.function?.name,
arguments: JSON.stringify(
toolCall.arguments || toolCall.function?.arguments || {}
),
},
};
const result = await toolIntegration.executeToolCall(formattedToolCall);
logger.debug('Tool execution result in request execution', { result });
toolResults.push(result);
}
// If we have tool results, format them into a readable response
if (toolResults.length > 0) {
const firstResult = toolResults[0];
if (firstResult.success && firstResult.output) {
// Return the actual tool result as the content
const content = firstResult.output.content || firstResult.output;
response.content = content;
response.metadata = {
tokens: 0,
latency: 0,
...response.metadata
};
logger.info('🔧 TOOL EXECUTION: Tool successfully executed in request execution', {
toolName: response.toolCalls[0]?.name || response.toolCalls[0]?.function?.name,
resultContent: content
});
} else if (firstResult.error) {
response.content = `Error executing tool: ${firstResult.error}`;
logger.error('Tool execution error in request execution', { error: firstResult.error });
}
}
} catch (error) {
logger.error('Error during tool execution in request execution', { error: getErrorMessage(error) });
response.content = `Error executing tools: ${getErrorMessage(error)}`;
}
} else {
logger.warn('Tool integration not available for tool execution in request execution');
}
}
metrics.endTime = Date.now();
metrics.success = true;
metrics.tokenCount = response.usage?.totalTokens;
this.activeRequests.delete(requestId);
this.emit('requestSuccess', {
requestId,
provider: providerType,
duration: metrics.endTime - metrics.startTime,
});
logger.info(
`✅ Request ${requestId} succeeded with ${providerType} in ${metrics.endTime - metrics.startTime}ms`
);
return response;
} catch (error: unknown) {
const errorMessage = getErrorMessage(error);
lastError = toError(error);
const metrics = this.activeRequests.get(requestId);
if (metrics) {
metrics.endTime = Date.now();
metrics.error = errorMessage;
}
// Request failed - timeout optimizer handles cleanup automatically
logger.warn(`❌ ${providerType} failed for request ${requestId}: ${errorMessage}`);
this.emit('requestError', { requestId, provider: providerType, error: errorMessage });
// Continue to next provider in fallback chain
}
}
// All providers failed
this.activeRequests.delete(requestId);
const error = lastError || new Error('All providers in fallback chain failed');
throw error;
}
/**
* Determine execution strategy based on request characteristics
*/
determineExecutionStrategy(request: ModelRequest, context?: ProjectContext): ExecutionStrategy {
const prompt = request.prompt || '';
const complexity = this.assessComplexityFast(prompt);
// Default strategy
let strategy: ExecutionStrategy = {
mode: 'balanced',
provider: 'auto',
timeout: this.config.complexityTimeouts.medium,
complexity,
};
// Adjust based on request characteristics
if (request.stream) {
strategy.mode = 'fast';
strategy.timeout = this.config.complexityTimeouts.simple;
}
if (request.tools && request.tools.length > 0) {
strategy.provider = 'lm-studio'; // Prefer LM Studio for tool use
strategy.timeout = this.config.complexityTimeouts.complex;
}
// Adjust timeout based on complexity
switch (complexity) {
case 'simple':
strategy.timeout = this.config.complexityTimeouts.simple;
if (strategy.mode === 'balanced') strategy.mode = 'fast';
break;
case 'complex':
strategy.timeout = this.config.complexityTimeouts.complex;
if (strategy.mode === 'fast') strategy.mode = 'balanced';
break;
default:
strategy.timeout = this.config.complexityTimeouts.medium;
}
// Consider context if available (could be enhanced with more context analysis)
if (context && context.files && context.files.length > 10) {
// Large project context might need more time
strategy.timeout = Math.max(strategy.timeout, this.config.complexityTimeouts.complex);
}
logger.debug('Execution strategy determined:', strategy);
return strategy;
}
/**
* Get currently active requests
*/
getActiveRequests(): Map<string, RequestMetrics> {
return new Map(this.activeRequests);
}
/**
* Assess request complexity quickly
*/
private assessComplexityFast(prompt: string): 'simple' | 'medium' | 'complex' {
const length = prompt.length;
const complexKeywords = [
'analyze',
'architecture',
'security',
'performance',
'optimize',
'refactor',
];
const hasComplexKeywords = complexKeywords.some(keyword =>
prompt.toLowerCase().includes(keyword)
);
if (length < 100 && !hasComplexKeywords) return 'simple';
if (length > 500 || hasComplexKeywords) return 'complex';
return 'medium';
}
/**
* Estimate memory usage for request
*/
private estimateMemoryUsage(request: ModelRequest): number {
const baseUsage = 50; // MB base usage
const promptSize = (request.prompt?.length || 0) * 0.001; // Rough estimate
const contextSize = request.context ? Object.keys(request.context).length * 5 : 0;
return Math.round(baseUsage + promptSize + contextSize);
}
/**
* Determine process type for process manager
*/
private getProcessType(
request: ModelRequest
): 'model_inference' | 'analysis' | 'generation' | 'streaming' {
if (request.stream) return 'streaming';
if (request.tools && request.tools.length > 0) return 'generation';
return 'model_inference';
}
/**
* Get request priority
*/
private getRequestPriority(request: ModelRequest): ActiveProcess['priority'] {
if (request.stream) return 'high'; // Streaming requests need responsiveness
if (request.tools && request.tools.length > 0) return 'medium'; // Tool use is important
return 'low'; // Regular inference
}
/**
* Create timeout promise for race conditions
*/
private createTimeoutPromise(timeoutMs: number): Promise<never> {
return new Promise((_, reject) => {
setTimeout(() => {
reject(new Error(`Request timed out after ${timeoutMs}ms`));
}, timeoutMs);
});
}
/**
* Create optimized timeout promise with abort controller
*/
private createOptimizedTimeoutPromise(timeoutMs: number, abortController: AbortController): Promise<never> {
return new Promise((_, reject) => {
const timeoutId = setTimeout(() => {
abortController.abort();
reject(new Error(`Request timed out after optimized ${timeoutMs}ms`));
}, timeoutMs);
// Clear timeout if request is aborted externally
abortController.signal.addEventListener('abort', () => {
clearTimeout(timeoutId);
reject(new Error('Request aborted'));
});
});
}
/**
* Generate unique request ID
*/
private generateRequestId(): string {
return `req_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
}
/**
* Schedule queue processor (event-driven, not infinite loop)
*/
private scheduleQueueProcessor(): void {
if (this.isShuttingDown || this.queueProcessor) {
return;
}
this.queueProcessor = setTimeout(() => {
this.queueProcessor = null;
this.processQueueBatch();
}, 50); // Reduced from 100ms for better responsiveness
}
/**
* Process queue batch efficiently
*/
private async processQueueBatch(): Promise<void> {
if (this.isShuttingDown) {
return;
}
// Process multiple items in parallel if capacity allows
const availableSlots = this.config.maxConcurrentRequests - this.activeRequests.size;
const batchSize = Math.min(availableSlots, this.requestQueue.length, 3); // Max 3 concurrent
if (batchSize <= 0) {
// No capacity, reschedule
this.scheduleQueueProcessor();
return;
}
const batch = this.requestQueue.splice(0, batchSize);
// Process batch items concurrently
const batchPromises = batch.map(async (queueItem) => {
try {
const response = await this.processRequest(queueItem.request, queueItem.context);
queueItem.resolve(response);
} catch (error) {
queueItem.reject(error);
}
});
// Process batch without blocking
Promise.allSettled(batchPromises);
// Reschedule if there are more items to process
if (this.requestQueue.length > 0) {
this.scheduleQueueProcessor();
}
}
/**
* Queue request if at capacity
*/
async queueRequest(request: ModelRequest, context?: ProjectContext): Promise<ModelResponse> {
if (this.activeRequests.size < this.config.maxConcurrentRequests) {
return this.processRequest(request, context);
}
return new Promise<ModelResponse>((resolve, reject) => {
this.requestQueue.push({
request,
context,
resolve: resolve as (value: ModelResponse) => void,
reject: reject as (reason?: any) => void,
});
logger.info('Request queued due to capacity limit');
// Trigger queue processor
this.scheduleQueueProcessor();
});
}
/**
* Graceful shutdown
*/
async shutdown(): Promise<void> {
logger.info('🔄 Shutting down RequestExecutionManager...');
this.isShuttingDown = true;
// Clear scheduled queue processor
if (this.queueProcessor) {
clearTimeout(this.queueProcessor);
this.queueProcessor = null;
}
// Reject any pending requests
while (this.requestQueue.length > 0) {
const queueItem = this.requestQueue.shift();
if (queueItem) {
queueItem.reject(new Error('System shutting down'));
}
}
// Wait for active requests to complete (with timeout)
const shutdownTimeout = 10000; // 10 seconds
const startTime = Date.now();
while (this.activeRequests.size > 0 && (Date.now() - startTime) < shutdownTimeout) {
await new Promise(resolve => setTimeout(resolve, 100));
}
if (this.activeRequests.size > 0) {
logger.warn(`${this.activeRequests.size} requests still active during shutdown`);
}
logger.info('✅ RequestExecutionManager shutdown complete');
}
/**
* Check if provider/model combination supports tools
*/
private modelSupportsTools(provider: ProviderType, model?: string): boolean {
if (provider === 'lm-studio') {
return true; // LM Studio generally supports OpenAI-compatible function calling
}
if (provider === 'ollama') {
// If no specific model provided, assume auto-selection will pick a supported model
if (!model) {
logger.debug(
'No specific model provided, assuming auto-selection will pick supported model'
);
return true; // Trust that auto-selection picks qwen2.5-coder which supports tools
}
// Only certain Ollama models support function calling
const model_name = model.toLowerCase();
const supportedModels = [
'llama3',
'llama3.1',
'llama3.2',
'qwen2.5',
'qwq',
'mistral',
'codellama',
];
const isSupported = supportedModels.some(supportedModel =>
model_name.includes(supportedModel)
);
logger.debug('Model tool support check', { model: model_name, isSupported });
return isSupported;
}
return false; // Conservative default - no tools for unknown providers
}
/**
* Get execution statistics
*/
getExecutionStats(): any {
return {
activeRequests: this.activeRequests.size,
queuedRequests: this.requestQueue.length,
maxConcurrent: this.config.maxConcurrentRequests,
config: this.config,
};
}
}