UNPKG

codecrucible-synth

Version:

Production-Ready AI Development Platform with Multi-Voice Synthesis, Smithery MCP Integration, Enterprise Security, and Zero-Timeout Reliability

607 lines (524 loc) 20.3 kB
import axios, { AxiosInstance } from 'axios'; import http from 'http'; import https from 'https'; import { logger } from '../core/logger.js'; import { readFileSync, existsSync } from 'fs'; import { load as loadYaml } from 'js-yaml'; import { join } from 'path'; import { getErrorMessage } from '../utils/error-utils.js'; import { responseCache } from '../core/performance/response-cache-manager.js'; import { modelPreloader } from '../core/performance/model-preloader.js'; export interface OllamaConfig { endpoint?: string; model?: string; timeout?: number; } export class OllamaProvider { private httpClient: AxiosInstance; private config: OllamaConfig; private model: string; private isAvailable: boolean = false; constructor(config: OllamaConfig) { logger.debug('OllamaProvider constructor called', { config }); this.config = { endpoint: config.endpoint || 'http://localhost:11434', model: config.model, // Will be set by autonomous detection timeout: config.timeout || 30000, // Reduced timeout for better responsiveness }; this.model = this.config.model || 'auto-detect'; // Mark for autonomous detection logger.debug('OllamaProvider model state', { model: this.model }); // Create HTTP agents with connection pooling for performance optimization const httpAgent = new http.Agent({ keepAlive: true, maxSockets: 10, // Max concurrent connections maxFreeSockets: 5, // Keep 5 idle connections timeout: 5000, // Socket timeout }); const httpsAgent = new https.Agent({ keepAlive: true, maxSockets: 10, maxFreeSockets: 5, timeout: 5000, }); this.httpClient = axios.create({ baseURL: this.config.endpoint, timeout: this.config.timeout, httpAgent, // Connection pooling for HTTP httpsAgent, // Connection pooling for HTTPS headers: { 'Content-Type': 'application/json', Connection: 'keep-alive', // Explicitly request keep-alive }, }); logger.debug('Ollama HTTP connection pooling configured', { maxSockets: 10, keepAlive: '30s' }); } async processRequest(request: any, _context?: any): Promise<any> { logger.debug('OllamaProvider.processRequest called'); logger.debug('OllamaProvider request object', { prompt: `${(request.prompt || '').substring(0, 200)}...`, model: request.model, temperature: request.temperature, maxTokens: request.maxTokens, hasTools: !!request.tools?.length, }); try { // Check status and perform autonomous model detection logger.debug('Checking Ollama availability and detecting models'); // Check status and perform autonomous model detection if (!this.isAvailable || this.model === 'auto-detect') { logger.debug('Performing model detection and status check'); await this.checkStatus(); } logger.debug('Ollama is available', { model: this.model }); const result = await this.generate(request); logger.debug('OllamaProvider response received', { contentLength: result?.content?.length }); return result; } catch (error: unknown) { logger.error('OllamaProvider error', error); throw error; } } async generate(request: any): Promise<any> { const startTime = Date.now(); logger.debug('OllamaProvider generate method started'); // Check cache first (skip for streaming and function calls with dynamic data) const shouldCache = !request.stream && (!request.tools || request.tools.length === 0); if (shouldCache) { const cachedResponse = responseCache.get( request.prompt, this.model, 'ollama', request.tools ); if (cachedResponse) { const responseTime = Date.now() - startTime; logger.debug('Using cached response', { hitCount: cachedResponse.metadata.hitCount, cacheAge: Date.now() - cachedResponse.metadata.timestamp, responseTime }); // Record cache hit as successful usage modelPreloader.recordModelUsage(this.model, 'ollama', responseTime, true); return { content: cachedResponse.response.content, finishReason: cachedResponse.response.finishReason || 'cached', usage: cachedResponse.response.usage, fromCache: true }; } } // Check if model is warmed up const isWarmed = modelPreloader.isModelWarmed(this.model, 'ollama'); if (isWarmed) { logger.debug(`Using pre-warmed model: ${this.model}`); } // Use external AbortSignal if provided, otherwise create our own const externalAbortSignal = request.abortSignal; const abortController = externalAbortSignal ? undefined : new AbortController(); const effectiveAbortSignal = externalAbortSignal || abortController?.signal; let timeoutId: NodeJS.Timeout | undefined; if (!externalAbortSignal && abortController) { timeoutId = setTimeout(() => { logger.debug('Ollama request timeout, aborting'); abortController.abort(); }, this.config.timeout || 30000); } try { const hasTools = request.tools && request.tools.length > 0; const endpoint = hasTools ? '/api/chat' : '/api/generate'; let requestBody: any; if (hasTools) { // Use chat format for function calling (required for tools) requestBody = { model: this.model, messages: [ { role: 'user', content: request.prompt } ], tools: request.tools, stream: false, options: { temperature: request.temperature || 0.1, num_predict: request.maxTokens || 2048, top_p: 0.9, top_k: 40, ...this.getGPUConfig(), } }; logger.debug('Sending function-calling request to Ollama API', { url: `${this.config.endpoint}${endpoint}`, model: this.model, toolCount: request.tools.length, messageContent: request.prompt?.substring(0, 100) + '...', }); } else { // Use generate format for regular text requestBody = { model: this.model, prompt: request.prompt, stream: false, options: { temperature: request.temperature || 0.1, num_predict: request.maxTokens || 2048, top_p: 0.9, top_k: 40, ...this.getGPUConfig(), }, }; logger.debug('Sending regular request to Ollama API', { url: `${this.config.endpoint}${endpoint}`, model: this.model, promptLength: request.prompt?.length || 0, }); } const response = await this.httpClient.post(endpoint, requestBody, { signal: effectiveAbortSignal, timeout: this.config.timeout, }); if (timeoutId) clearTimeout(timeoutId); logger.debug('Received response from Ollama API'); const responseData = response.data; // Handle chat response format (with tools) if (responseData.message) { const message = responseData.message; if (message.tool_calls && message.tool_calls.length > 0) { logger.debug('Ollama tool calls detected (chat format)', { toolCallCount: message.tool_calls.length, toolNames: message.tool_calls.map((call: any) => call.function?.name), messageContent: message.content?.substring(0, 100) + '...' }); return { content: message.content || '', toolCalls: message.tool_calls.map((call: any) => ({ name: call.function?.name, function: call.function, arguments: typeof call.function?.arguments === 'string' ? JSON.parse(call.function.arguments) : call.function?.arguments })), usage: { totalTokens: (responseData.prompt_eval_count || 0) + (responseData.eval_count || 0), promptTokens: responseData.prompt_eval_count || 0, completionTokens: responseData.eval_count || 0, }, done: responseData.done || true, model: this.model, provider: 'ollama', }; } // Regular chat response without tools const chatResponse = { content: message.content || '', usage: { totalTokens: (responseData.prompt_eval_count || 0) + (responseData.eval_count || 0), promptTokens: responseData.prompt_eval_count || 0, completionTokens: responseData.eval_count || 0, }, done: responseData.done || true, model: this.model, provider: 'ollama', }; // Store in cache if caching is enabled if (shouldCache && chatResponse.content) { responseCache.set( request.prompt, this.model, 'ollama', { content: chatResponse.content, usage: chatResponse.usage, finishReason: 'stop' }, request.tools ); } // Record performance metrics const responseTime = Date.now() - startTime; modelPreloader.recordModelUsage(this.model, 'ollama', responseTime, true); return chatResponse; } // Handle generate response format (no tools) - LEGACY FORMAT if (responseData.tool_calls && responseData.tool_calls.length > 0) { logger.debug('Ollama tool calls detected (generate format)', { toolCallCount: responseData.tool_calls.length, toolNames: responseData.tool_calls.map((call: any) => call.function?.name) }); return { content: responseData.response || '', toolCalls: responseData.tool_calls.map((call: any) => ({ name: call.function?.name, function: call.function, arguments: call.function?.arguments })), usage: { totalTokens: (responseData.prompt_eval_count || 0) + (responseData.eval_count || 0), promptTokens: responseData.prompt_eval_count || 0, completionTokens: responseData.eval_count || 0, }, done: responseData.done || true, model: this.model, provider: 'ollama', }; } // Regular generate response without tools const finalResponse = { content: responseData.response || '', usage: { totalTokens: (responseData.prompt_eval_count || 0) + (responseData.eval_count || 0), promptTokens: responseData.prompt_eval_count || 0, completionTokens: responseData.eval_count || 0, }, done: responseData.done || true, model: this.model, provider: 'ollama', }; // Store in cache if caching is enabled if (shouldCache && finalResponse.content) { responseCache.set( request.prompt, this.model, 'ollama', { content: finalResponse.content, usage: finalResponse.usage, finishReason: 'stop' }, request.tools ); } // Record performance metrics const responseTime = Date.now() - startTime; modelPreloader.recordModelUsage(this.model, 'ollama', responseTime, true); return finalResponse; } catch (error: unknown) { if (timeoutId) clearTimeout(timeoutId); logger.error('Ollama API error', error); // Record failed request metrics const responseTime = Date.now() - startTime; modelPreloader.recordModelUsage(this.model, 'ollama', responseTime, false); if ((error as any)?.name === 'AbortError') { throw new Error('Ollama API request timed out'); } // Fallback response for debugging if ((error as any)?.code === 'ECONNREFUSED') { logger.warn('Ollama not available, using fallback response'); return { content: `I understand you want to work with this code, but I'm unable to connect to Ollama right now. The request was: "${request.prompt?.substring(0, 100)}..."`, usage: { totalTokens: 20, promptTokens: 10, completionTokens: 10 }, done: true, model: this.model, provider: 'ollama-fallback', }; } throw error; } } async checkStatus(): Promise<boolean> { logger.debug('Checking Ollama status'); const abortController = new AbortController(); const timeoutId = setTimeout(() => { logger.debug('Status check timeout, aborting'); abortController.abort(); }, 5000); // Shorter timeout for status checks try { const response = await this.httpClient.get('/api/tags', { signal: abortController.signal, timeout: 5000, }); clearTimeout(timeoutId); if (response.data?.models) { const availableModels = response.data.models.map((m: any) => m.name); logger.debug('Available Ollama models', { availableModels }); // Set model based on configuration or auto-detect capabilities if (!this.model || this.model === 'auto-detect') { this.model = await this.selectOptimalModel(availableModels); logger.debug('Selected model', { model: this.model }); } this.isAvailable = true; return true; } return false; } catch (error: unknown) { clearTimeout(timeoutId); logger.warn('Ollama not available', error); this.isAvailable = false; // Set fallback model for offline scenarios if (!this.model || this.model === 'auto-detect') { this.model = 'auto-detect'; // Will be resolved when connection restored } return false; } } async listModels(): Promise<string[]> { logger.debug('Listing Ollama models'); const abortController = new AbortController(); const timeoutId = setTimeout(() => { abortController.abort(); }, 5000); try { const response = await this.httpClient.get('/api/tags', { signal: abortController.signal, timeout: 5000, }); clearTimeout(timeoutId); if (response.data?.models) { const models = response.data.models.map((m: any) => m.name); logger.debug('Found models', { models }); return models; } return []; } catch (error: unknown) { clearTimeout(timeoutId); logger.warn('Could not list models', error); return []; // Return empty array to force re-detection } } /** * Dynamically select optimal model based on capabilities and requirements * Prioritizes function-calling capable models when tools are needed */ private async selectOptimalModel(availableModels: string[]): Promise<string> { if (availableModels.length === 0) { logger.warn('No models available, using fallback'); return 'auto-detect'; } // Detect function-calling capabilities by model patterns and test if needed const functionCallingModels = this.detectFunctionCallingModels(availableModels); const codingModels = this.detectCodingModels(availableModels); const generalModels = this.detectGeneralPurposeModels(availableModels); // Prioritize function-calling models when tools are available if (functionCallingModels.length > 0) { const selected = functionCallingModels[0]; logger.info(`🛠️ Selected function-calling capable model: ${selected}`); return selected; } // Fallback to coding models if (codingModels.length > 0) { const selected = codingModels[0]; logger.info(`💻 Selected coding model: ${selected} (limited tool support)`); return selected; } // Final fallback to any general model if (generalModels.length > 0) { const selected = generalModels[0]; logger.info(`🤖 Selected general model: ${selected} (no tool support)`); return selected; } // Use first available as last resort const selected = availableModels[0]; logger.info(`⚠️ Using first available model: ${selected} (capabilities unknown)`); return selected; } /** * Detect models with function-calling capabilities based on known patterns */ private detectFunctionCallingModels(models: string[]): string[] { const functionCallingPatterns = [ /^llama3\.1/i, // Llama 3.1 series has strong function calling /^mistral.*instruct/i, // Mistral Instruct models support tools /^qwen2\.5.*instruct/i, // Qwen 2.5 Instruct models /^codellama.*instruct/i, // CodeLlama Instruct /^openchat/i, // OpenChat models /^hermes/i, // Nous Hermes series /^yi.*chat/i, // Yi Chat models ]; return models.filter(model => functionCallingPatterns.some(pattern => pattern.test(model)) ).sort((a, b) => { // Prefer larger models (assuming better capabilities) const sizeA = this.extractModelSize(a); const sizeB = this.extractModelSize(b); return sizeB - sizeA; }); } /** * Detect coding-focused models */ private detectCodingModels(models: string[]): string[] { const codingPatterns = [ /coder/i, /code/i, /programming/i, /deepseek/i, /wizard.*coder/i, /starcoder/i, ]; return models.filter(model => codingPatterns.some(pattern => pattern.test(model)) ).sort((a, b) => { const sizeA = this.extractModelSize(a); const sizeB = this.extractModelSize(b); return sizeB - sizeA; }); } /** * Detect general purpose models */ private detectGeneralPurposeModels(models: string[]): string[] { return models.filter(model => !this.detectFunctionCallingModels([model]).length && !this.detectCodingModels([model]).length ).sort((a, b) => { const sizeA = this.extractModelSize(a); const sizeB = this.extractModelSize(b); return sizeB - sizeA; }); } /** * Extract model size from model name (e.g., "7b", "13b") */ private extractModelSize(modelName: string): number { const sizeMatch = modelName.match(/(\d+\.?\d*)b/i); return sizeMatch ? parseFloat(sizeMatch[1]) : 0; } /** * Set model manually (for slash command support) */ async setModel(modelName: string): Promise<boolean> { const availableModels = await this.listModels(); if (!availableModels.includes(modelName)) { logger.error(`Model ${modelName} not available. Available models:`, availableModels); return false; } this.model = modelName; logger.info(`🔄 Switched to model: ${modelName}`); // Test if model supports function calling const functionCallingCapable = this.detectFunctionCallingModels([modelName]).length > 0; if (functionCallingCapable) { logger.info(`✅ Model ${modelName} supports function calling`); } else { logger.warn(`⚠️ Model ${modelName} may not support function calling`); } return true; } /** * Get current model info */ getCurrentModelInfo(): { name: string; supportsFunctionCalling: boolean; type: 'function-calling' | 'coding' | 'general' | 'unknown' } { const functionCalling = this.detectFunctionCallingModels([this.model]).length > 0; const coding = this.detectCodingModels([this.model]).length > 0; let type: 'function-calling' | 'coding' | 'general' | 'unknown'; if (functionCalling) type = 'function-calling'; else if (coding) type = 'coding'; else if (this.model !== 'auto-detect') type = 'general'; else type = 'unknown'; return { name: this.model, supportsFunctionCalling: functionCalling, type }; } async warmModel(): Promise<void> { logger.info('Model is ready', { model: this.model }); } private getGPUConfig(): any { return { num_gpu: 0, num_thread: 4, num_batch: 64, num_ctx: 8192, }; } }