codecrucible-synth
Version:
Production-Ready AI Development Platform with Multi-Voice Synthesis, Smithery MCP Integration, Enterprise Security, and Zero-Timeout Reliability
365 lines (312 loc) • 11.3 kB
text/typescript
/**
* Dynamic Model Router - Automatically detects and routes to best available models
* Supports role-based routing: Auditor (LM Studio) vs Writer (Ollama)
*/
import axios from 'axios';
import { logger } from './logger.js';
import { EventEmitter } from 'events';
export interface ModelCapabilities {
provider: 'ollama' | 'lm-studio';
model: string;
size: string;
contextWindow: number;
capabilities: string[];
strengths: string[];
isCodeFocused: boolean;
responseSpeed: 'fast' | 'medium' | 'slow';
qualityLevel: 'high' | 'medium' | 'low';
}
export interface RoleConfiguration {
role: 'auditor' | 'writer' | 'auto';
preferredProvider: 'ollama' | 'lm-studio' | 'auto';
taskType: string;
requiresCodeAnalysis: boolean;
requiresGeneration: boolean;
}
export class DynamicModelRouter extends EventEmitter {
private availableModels: ModelCapabilities[] = [];
private currentRole: 'auditor' | 'writer' | 'auto' = 'auto';
private lastScan: number = 0;
private readonly SCAN_CACHE_TTL = 30000; // 30 seconds
constructor() {
super();
this.setMaxListeners(20);
}
/**
* Dynamically detect all available models across providers
*/
async detectAvailableModels(): Promise<ModelCapabilities[]> {
if (Date.now() - this.lastScan < this.SCAN_CACHE_TTL && this.availableModels.length > 0) {
return this.availableModels;
}
const models: ModelCapabilities[] = [];
// Scan Ollama models
try {
const ollamaModels = await this.scanOllamaModels();
models.push(...ollamaModels);
logger.info(`Found ${ollamaModels.length} Ollama models`);
} catch (error) {
logger.warn(
'Failed to scan Ollama models:',
error instanceof Error ? error.message : String(error)
);
}
// Scan LM Studio models
try {
const lmStudioModels = await this.scanLMStudioModels();
models.push(...lmStudioModels);
logger.info(`Found ${lmStudioModels.length} LM Studio models`);
} catch (error) {
logger.warn(
'Failed to scan LM Studio models:',
error instanceof Error ? error.message : String(error)
);
}
this.availableModels = models;
this.lastScan = Date.now();
logger.info(`Found ${models.length} total models`);
return models;
}
/**
* Scan Ollama for available models
*/
private async scanOllamaModels(): Promise<ModelCapabilities[]> {
const response = await axios.get('http://localhost:11434/api/tags', { timeout: 5000 });
const models: ModelCapabilities[] = [];
for (const model of response.data.models || []) {
const capabilities = this.analyzeModelCapabilities(model.name, 'ollama');
models.push({
provider: 'ollama',
model: model.name,
size: model.size || 'unknown',
contextWindow: this.estimateContextWindow(model.name),
capabilities: capabilities.capabilities,
strengths: capabilities.strengths,
isCodeFocused: capabilities.isCodeFocused,
responseSpeed: capabilities.responseSpeed,
qualityLevel: capabilities.qualityLevel,
});
}
return models;
}
/**
* Scan LM Studio for available models
*/
private async scanLMStudioModels(): Promise<ModelCapabilities[]> {
try {
const response = await axios.get('http://localhost:1234/v1/models', { timeout: 5000 });
const models: ModelCapabilities[] = [];
for (const model of response.data.data || []) {
const capabilities = this.analyzeModelCapabilities(model.id, 'lm-studio');
models.push({
provider: 'lm-studio',
model: model.id,
size: 'unknown',
contextWindow: this.estimateContextWindow(model.id),
capabilities: capabilities.capabilities,
strengths: capabilities.strengths,
isCodeFocused: capabilities.isCodeFocused,
responseSpeed: capabilities.responseSpeed,
qualityLevel: capabilities.qualityLevel,
});
}
return models;
} catch (error) {
logger.debug(
'LM Studio API not available:',
error instanceof Error ? error.message : String(error)
);
return [];
}
}
/**
* Analyze model capabilities based on name patterns
*/
private analyzeModelCapabilities(
modelName: string,
provider: string
): {
capabilities: string[];
strengths: string[];
isCodeFocused: boolean;
responseSpeed: 'fast' | 'medium' | 'slow';
qualityLevel: 'high' | 'medium' | 'low';
} {
const name = modelName.toLowerCase();
const capabilities: string[] = [];
const strengths: string[] = [];
let isCodeFocused = false;
let responseSpeed: 'fast' | 'medium' | 'slow' = 'medium';
let qualityLevel: 'high' | 'medium' | 'low' = 'medium';
// Code-focused models
if (name.includes('coder') || name.includes('code') || name.includes('programming')) {
isCodeFocused = true;
capabilities.push('code-generation', 'code-analysis', 'debugging');
strengths.push('Programming', 'Code Review', 'Technical Analysis');
}
// Model size indicators
if (name.includes('3b') || name.includes('7b')) {
responseSpeed = 'fast';
qualityLevel = 'medium';
} else if (name.includes('13b') || name.includes('14b')) {
responseSpeed = 'medium';
qualityLevel = 'high';
} else if (
name.includes('20b') ||
name.includes('27b') ||
name.includes('30b') ||
name.includes('32b')
) {
responseSpeed = 'slow';
qualityLevel = 'high';
}
// Specialized capabilities
if (name.includes('instruct') || name.includes('chat')) {
capabilities.push('conversation', 'instruction-following');
strengths.push('Interactive Communication');
}
if (name.includes('qwen') || name.includes('gemma') || name.includes('llama')) {
capabilities.push('general-reasoning', 'text-generation');
strengths.push('General Intelligence');
}
// Provider-specific optimizations
if (provider === 'lm-studio') {
strengths.push('Fast Inference', 'Windows Optimized');
if (responseSpeed === 'medium') responseSpeed = 'fast';
}
if (provider === 'ollama') {
strengths.push('Quality Reasoning', 'Model Management');
}
return { capabilities, strengths, isCodeFocused, responseSpeed, qualityLevel };
}
/**
* Estimate context window based on model name
*/
private estimateContextWindow(modelName: string): number {
const name = modelName.toLowerCase();
if (name.includes('32k')) return 32768;
if (name.includes('16k')) return 16384;
if (name.includes('8k')) return 8192;
if (name.includes('4k')) return 4096;
// Default estimates based on model type
if (name.includes('qwen2.5')) return 32768;
if (name.includes('gemma')) return 8192;
if (name.includes('llama3')) return 8192;
return 4096; // Conservative default
}
/**
* Select optimal model based on role and task requirements
*/
async selectModelForRole(config: RoleConfiguration): Promise<ModelCapabilities | null> {
await this.detectAvailableModels();
let filteredModels = this.availableModels;
// Role-based filtering
if (config.role === 'auditor') {
// Auditor prefers LM Studio for fast analysis
filteredModels = filteredModels.filter(
m => m.provider === 'lm-studio' || (m.provider === 'ollama' && m.isCodeFocused)
);
} else if (config.role === 'writer') {
// Writer prefers Ollama for quality generation
filteredModels = filteredModels.filter(
m => m.provider === 'ollama' || (m.provider === 'lm-studio' && m.qualityLevel === 'high')
);
}
// Prefer code-focused models for technical tasks
if (config.requiresCodeAnalysis) {
const codeFocusedModels = filteredModels.filter(m => m.isCodeFocused);
if (codeFocusedModels.length > 0) {
filteredModels = codeFocusedModels;
}
}
if (filteredModels.length === 0) {
logger.warn(`No suitable models found for role: ${config.role}`);
return null;
}
// Score and rank models
const scoredModels = filteredModels.map(model => ({
model,
score: this.scoreModelForRole(model, config),
}));
// Sort by score (highest first)
scoredModels.sort((a, b) => b.score - a.score);
const selected = scoredModels[0].model;
logger.info(`Selected model for ${config.role}: ${selected.model} (${selected.provider})`);
return selected;
}
/**
* Score model suitability for a specific role
*/
private scoreModelForRole(model: ModelCapabilities, config: RoleConfiguration): number {
let score = 0;
// Base score for provider preference
if (config.role === 'auditor' && model.provider === 'lm-studio') score += 30;
if (config.role === 'writer' && model.provider === 'ollama') score += 30;
// Code focus bonus
if (config.requiresCodeAnalysis && model.isCodeFocused) score += 25;
// Quality vs Speed trade-offs
if (config.role === 'auditor') {
// Auditor prefers speed
if (model.responseSpeed === 'fast') score += 20;
if (model.responseSpeed === 'medium') score += 10;
} else if (config.role === 'writer') {
// Writer prefers quality
if (model.qualityLevel === 'high') score += 20;
if (model.qualityLevel === 'medium') score += 10;
}
// Context window bonus
if (model.contextWindow >= 32768) score += 15;
else if (model.contextWindow >= 16384) score += 10;
else if (model.contextWindow >= 8192) score += 5;
// Capability matching
const relevantCapabilities = ['code-analysis', 'code-generation', 'general-reasoning'];
const matchedCapabilities = model.capabilities.filter(cap =>
relevantCapabilities.includes(cap)
).length;
score += matchedCapabilities * 5;
return score;
}
/**
* Switch current role
*/
setRole(role: 'auditor' | 'writer' | 'auto'): void {
const previousRole = this.currentRole;
this.currentRole = role;
logger.info(`Role switched: ${previousRole} -> ${role}`);
this.emit('role-changed', { previous: previousRole, current: role });
}
/**
* Get current role
*/
getCurrentRole(): 'auditor' | 'writer' | 'auto' {
return this.currentRole;
}
/**
* Get models grouped by provider
*/
async getModelsByProvider(): Promise<{
ollama: ModelCapabilities[];
lmStudio: ModelCapabilities[];
}> {
await this.detectAvailableModels();
return {
ollama: this.availableModels.filter(m => m.provider === 'ollama'),
lmStudio: this.availableModels.filter(m => m.provider === 'lm-studio'),
};
}
/**
* Get best model for current role
*/
async getBestModelForCurrentRole(
taskType: string = 'general'
): Promise<ModelCapabilities | null> {
const config: RoleConfiguration = {
role: this.currentRole,
preferredProvider: 'auto',
taskType,
requiresCodeAnalysis: taskType.includes('audit') || taskType.includes('analyze'),
requiresGeneration: taskType.includes('write') || taskType.includes('generate'),
};
return this.selectModelForRole(config);
}
}