UNPKG

codecrucible-synth

Version:

Production-Ready AI Development Platform with Multi-Voice Synthesis, Smithery MCP Integration, Enterprise Security, and Zero-Timeout Reliability

261 lines (224 loc) 8.56 kB
import axios, { AxiosInstance } from 'axios'; import http from 'http'; import https from 'https'; import { logger } from '../core/logger.js'; import { getErrorMessage } from '../utils/error-utils.js'; export interface LMStudioConfig { endpoint?: string; model?: string; timeout?: number; apiKey?: string; } export class LMStudioProvider { private httpClient: AxiosInstance; private config: LMStudioConfig; private model: string; private isAvailable: boolean = false; constructor(config: LMStudioConfig) { this.config = { endpoint: config.endpoint || 'http://localhost:1234', model: config.model || 'auto', timeout: config.timeout || 30000, }; this.model = this.config.model || 'auto'; // Create HTTP agents with connection pooling for performance optimization const httpAgent = new http.Agent({ keepAlive: true, maxSockets: 5, // LM Studio typically handles fewer concurrent requests maxFreeSockets: 3, // Keep 3 idle connections timeout: 5000, // Socket timeout }); const httpsAgent = new https.Agent({ keepAlive: true, maxSockets: 5, maxFreeSockets: 3, timeout: 5000, }); this.httpClient = axios.create({ baseURL: this.config.endpoint, timeout: this.config.timeout, httpAgent, // Connection pooling for HTTP httpsAgent, // Connection pooling for HTTPS headers: { 'Content-Type': 'application/json', Connection: 'keep-alive', // Explicitly request keep-alive ...(config.apiKey && { Authorization: `Bearer ${config.apiKey}` }), }, }); logger.debug('LM Studio HTTP connection pooling configured', { maxSockets: 5, keepAlive: '30s', }); } async processRequest(request: any, _context?: any): Promise<any> { // Check status first if (!this.isAvailable) { const available = await this.checkStatus(); if (!available) { throw new Error('LM Studio service is not available'); } } return this.generate(request); } async generate(request: any): Promise<any> { // Use external AbortSignal if provided, otherwise create our own const externalAbortSignal = request.abortSignal; const abortController = externalAbortSignal ? undefined : new AbortController(); const effectiveAbortSignal = externalAbortSignal || abortController?.signal; let timeoutId: NodeJS.Timeout | undefined; if (!externalAbortSignal && abortController) { timeoutId = setTimeout(() => { logger.debug('LM Studio request timeout, aborting'); abortController.abort(); }, this.config.timeout || 30000); } try { // Get available models first if model is 'auto' if (this.model === 'auto') { await this.selectOptimalModel(); } const response = await this.httpClient.post( '/v1/chat/completions', { model: this.model, messages: [ { role: 'system', content: `You are the AUDITOR component of CodeCrucible Synth's dual-agent system. Your role is to review, validate, and enhance responses generated by the Ollama model. AUDITOR RESPONSIBILITIES: 1. **Quality Control**: Review generated code for bugs, security issues, performance problems 2. **Best Practices**: Ensure code follows industry standards and conventions 3. **Completeness Check**: Verify the response fully addresses the user's request 4. **Tool Usage Validation**: Confirm proper use of filesystem tools and codebase interaction 5. **Enhancement**: Suggest improvements, missing edge cases, or better approaches AUDIT PROCESS: - Analyze the primary response from Ollama - Use tools to verify code correctness when needed - Check for security vulnerabilities and performance issues - Validate that filesystem operations were used appropriately - Provide constructive feedback and improvements AUDIT OUTPUT FORMAT: - Identify what was done well - Highlight any issues found - Suggest specific improvements - Validate tool usage was appropriate for the task Your audit should be thorough but concise, focusing on actionable feedback to enhance the final response quality.`, }, { role: 'user', content: request.prompt || request.text || request.content, }, ], tools: request.tools || [], temperature: request.temperature || 0.7, max_tokens: request.maxTokens || request.max_tokens || 16384, stream: false, }, { signal: effectiveAbortSignal, timeout: this.config.timeout || 30000, } ); const choice = response.data.choices?.[0]; if (!choice) { throw new Error('No response choices returned from LM Studio'); } if (timeoutId) clearTimeout(timeoutId); return { content: choice.message?.content || choice.text || '', model: this.model, provider: 'lm-studio', metadata: { tokens: response.data.usage?.total_tokens || 0, latency: Date.now(), // Will be calculated by caller quality: 0.85, // LM Studio generally provides good quality }, usage: { totalTokens: response.data.usage?.total_tokens || 0, promptTokens: response.data.usage?.prompt_tokens || 0, completionTokens: response.data.usage?.completion_tokens || 0, }, finished: choice.finish_reason === 'stop', }; } catch (error: unknown) { if (timeoutId) clearTimeout(timeoutId); if ((error as any)?.name === 'AbortError') { throw new Error('LM Studio API request timed out'); } if (error instanceof Error && 'code' in error && error.code === 'ECONNREFUSED') { this.isAvailable = false; throw new Error( 'LM Studio server is not running. Please start LM Studio and enable the local server.' ); } logger.error('LM Studio generation failed:', (error as Error).message); throw new Error(`LM Studio generation failed: ${(error as Error).message}`); } } async checkStatus(): Promise<boolean> { try { const response = await this.httpClient.get('/v1/models', { timeout: 5000 }); this.isAvailable = response.status === 200; if (this.isAvailable && response.data.data) { const models = response.data.data.map((m: Record<string, unknown>) => m.id); logger.info(`LM Studio available with ${models.length} models:`, models.slice(0, 3)); // Auto-select first available model if not specified if (this.model === 'auto' && models.length > 0) { this.model = models[0]; logger.info(`Auto-selected LM Studio model: ${this.model}`); } } return this.isAvailable; } catch (error: unknown) { this.isAvailable = false; if ((error as any)?.code !== 'ECONNREFUSED') { logger.warn('LM Studio status check failed:', getErrorMessage(error)); } return false; } } async listModels(): Promise<string[]> { try { const response = await this.httpClient.get('/v1/models'); if (response.data.data) { return response.data.data.map((m: Record<string, unknown>) => m.id); } return []; } catch (error) { logger.error('Failed to list LM Studio models:', error); return []; } } private async selectOptimalModel(): Promise<void> { try { const models = await this.listModels(); if (models.length === 0) { throw new Error('No models available in LM Studio'); } // Prefer fast, efficient models for LM Studio const preferredModels = ['tinyllama', 'phi-2', 'codellama-7b', 'mistral-7b', 'zephyr-7b']; let selectedModel = models[0]; // fallback for (const preferred of preferredModels) { const match = models.find(m => m.toLowerCase().includes(preferred)); if (match) { selectedModel = match; break; } } this.model = selectedModel; logger.info(`Selected optimal LM Studio model: ${this.model}`); } catch (error) { logger.warn('Could not select optimal model, using first available'); } } async healthCheck(): Promise<boolean> { return this.checkStatus(); } supportsModel(_modelName: string): boolean { // LM Studio supports any model loaded in it return true; } getModelName(): string { return this.model; } }