codecrucible-synth
Version:
Production-Ready AI Development Platform with Multi-Voice Synthesis, Smithery MCP Integration, Enterprise Security, and Zero-Timeout Reliability
261 lines (224 loc) • 8.56 kB
text/typescript
import axios, { AxiosInstance } from 'axios';
import http from 'http';
import https from 'https';
import { logger } from '../core/logger.js';
import { getErrorMessage } from '../utils/error-utils.js';
export interface LMStudioConfig {
endpoint?: string;
model?: string;
timeout?: number;
apiKey?: string;
}
export class LMStudioProvider {
private httpClient: AxiosInstance;
private config: LMStudioConfig;
private model: string;
private isAvailable: boolean = false;
constructor(config: LMStudioConfig) {
this.config = {
endpoint: config.endpoint || 'http://localhost:1234',
model: config.model || 'auto',
timeout: config.timeout || 30000,
};
this.model = this.config.model || 'auto';
// Create HTTP agents with connection pooling for performance optimization
const httpAgent = new http.Agent({
keepAlive: true,
maxSockets: 5, // LM Studio typically handles fewer concurrent requests
maxFreeSockets: 3, // Keep 3 idle connections
timeout: 5000, // Socket timeout
});
const httpsAgent = new https.Agent({
keepAlive: true,
maxSockets: 5,
maxFreeSockets: 3,
timeout: 5000,
});
this.httpClient = axios.create({
baseURL: this.config.endpoint,
timeout: this.config.timeout,
httpAgent, // Connection pooling for HTTP
httpsAgent, // Connection pooling for HTTPS
headers: {
'Content-Type': 'application/json',
Connection: 'keep-alive', // Explicitly request keep-alive
...(config.apiKey && { Authorization: `Bearer ${config.apiKey}` }),
},
});
logger.debug('LM Studio HTTP connection pooling configured', {
maxSockets: 5,
keepAlive: '30s',
});
}
async processRequest(request: any, _context?: any): Promise<any> {
// Check status first
if (!this.isAvailable) {
const available = await this.checkStatus();
if (!available) {
throw new Error('LM Studio service is not available');
}
}
return this.generate(request);
}
async generate(request: any): Promise<any> {
// Use external AbortSignal if provided, otherwise create our own
const externalAbortSignal = request.abortSignal;
const abortController = externalAbortSignal ? undefined : new AbortController();
const effectiveAbortSignal = externalAbortSignal || abortController?.signal;
let timeoutId: NodeJS.Timeout | undefined;
if (!externalAbortSignal && abortController) {
timeoutId = setTimeout(() => {
logger.debug('LM Studio request timeout, aborting');
abortController.abort();
}, this.config.timeout || 30000);
}
try {
// Get available models first if model is 'auto'
if (this.model === 'auto') {
await this.selectOptimalModel();
}
const response = await this.httpClient.post(
'/v1/chat/completions',
{
model: this.model,
messages: [
{
role: 'system',
content: `You are the AUDITOR component of CodeCrucible Synth's dual-agent system. Your role is to review, validate, and enhance responses generated by the Ollama model.
AUDITOR RESPONSIBILITIES:
1. **Quality Control**: Review generated code for bugs, security issues, performance problems
2. **Best Practices**: Ensure code follows industry standards and conventions
3. **Completeness Check**: Verify the response fully addresses the user's request
4. **Tool Usage Validation**: Confirm proper use of filesystem tools and codebase interaction
5. **Enhancement**: Suggest improvements, missing edge cases, or better approaches
AUDIT PROCESS:
- Analyze the primary response from Ollama
- Use tools to verify code correctness when needed
- Check for security vulnerabilities and performance issues
- Validate that filesystem operations were used appropriately
- Provide constructive feedback and improvements
AUDIT OUTPUT FORMAT:
- Identify what was done well
- Highlight any issues found
- Suggest specific improvements
- Validate tool usage was appropriate for the task
Your audit should be thorough but concise, focusing on actionable feedback to enhance the final response quality.`,
},
{
role: 'user',
content: request.prompt || request.text || request.content,
},
],
tools: request.tools || [],
temperature: request.temperature || 0.7,
max_tokens: request.maxTokens || request.max_tokens || 16384,
stream: false,
},
{
signal: effectiveAbortSignal,
timeout: this.config.timeout || 30000,
}
);
const choice = response.data.choices?.[0];
if (!choice) {
throw new Error('No response choices returned from LM Studio');
}
if (timeoutId) clearTimeout(timeoutId);
return {
content: choice.message?.content || choice.text || '',
model: this.model,
provider: 'lm-studio',
metadata: {
tokens: response.data.usage?.total_tokens || 0,
latency: Date.now(), // Will be calculated by caller
quality: 0.85, // LM Studio generally provides good quality
},
usage: {
totalTokens: response.data.usage?.total_tokens || 0,
promptTokens: response.data.usage?.prompt_tokens || 0,
completionTokens: response.data.usage?.completion_tokens || 0,
},
finished: choice.finish_reason === 'stop',
};
} catch (error: unknown) {
if (timeoutId) clearTimeout(timeoutId);
if ((error as any)?.name === 'AbortError') {
throw new Error('LM Studio API request timed out');
}
if (error instanceof Error && 'code' in error && error.code === 'ECONNREFUSED') {
this.isAvailable = false;
throw new Error(
'LM Studio server is not running. Please start LM Studio and enable the local server.'
);
}
logger.error('LM Studio generation failed:', (error as Error).message);
throw new Error(`LM Studio generation failed: ${(error as Error).message}`);
}
}
async checkStatus(): Promise<boolean> {
try {
const response = await this.httpClient.get('/v1/models', { timeout: 5000 });
this.isAvailable = response.status === 200;
if (this.isAvailable && response.data.data) {
const models = response.data.data.map((m: Record<string, unknown>) => m.id);
logger.info(`LM Studio available with ${models.length} models:`, models.slice(0, 3));
// Auto-select first available model if not specified
if (this.model === 'auto' && models.length > 0) {
this.model = models[0];
logger.info(`Auto-selected LM Studio model: ${this.model}`);
}
}
return this.isAvailable;
} catch (error: unknown) {
this.isAvailable = false;
if ((error as any)?.code !== 'ECONNREFUSED') {
logger.warn('LM Studio status check failed:', getErrorMessage(error));
}
return false;
}
}
async listModels(): Promise<string[]> {
try {
const response = await this.httpClient.get('/v1/models');
if (response.data.data) {
return response.data.data.map((m: Record<string, unknown>) => m.id);
}
return [];
} catch (error) {
logger.error('Failed to list LM Studio models:', error);
return [];
}
}
private async selectOptimalModel(): Promise<void> {
try {
const models = await this.listModels();
if (models.length === 0) {
throw new Error('No models available in LM Studio');
}
// Prefer fast, efficient models for LM Studio
const preferredModels = ['tinyllama', 'phi-2', 'codellama-7b', 'mistral-7b', 'zephyr-7b'];
let selectedModel = models[0]; // fallback
for (const preferred of preferredModels) {
const match = models.find(m => m.toLowerCase().includes(preferred));
if (match) {
selectedModel = match;
break;
}
}
this.model = selectedModel;
logger.info(`Selected optimal LM Studio model: ${this.model}`);
} catch (error) {
logger.warn('Could not select optimal model, using first available');
}
}
async healthCheck(): Promise<boolean> {
return this.checkStatus();
}
supportsModel(_modelName: string): boolean {
// LM Studio supports any model loaded in it
return true;
}
getModelName(): string {
return this.model;
}
}