codecrucible-synth
Version:
Production-Ready AI Development Platform with Multi-Voice Synthesis, Smithery MCP Integration, Enterprise Security, and Zero-Timeout Reliability
621 lines (530 loc) • 20.2 kB
text/typescript
/**
* Hardware-Aware Model Selector
* Automatically switches models based on hardware capabilities and performance issues
*/
import { Logger } from '../logger.js';
import {
IntelligentModelDetector,
ModelInfo,
OptimalConfiguration,
} from '../model-management/intelligent-model-detector.js';
import { EventEmitter } from 'events';
import * as os from 'os';
export interface HardwareProfile {
totalMemoryGB: number;
availableMemoryGB: number;
cpuCores: number;
hasGPU: boolean;
gpuMemoryGB?: number;
platform: string;
arch: string;
}
export interface PerformanceMetrics {
responseTime: number;
memoryUsage: number;
cpuUsage: number;
errorRate: number;
tokensPerSecond: number;
consecutiveErrors: number;
lastSuccessTime: number;
}
export interface ModelSwitchEvent {
reason: 'hardware_constraint' | 'performance_degradation' | 'error_threshold' | 'timeout' | 'oom';
fromModel: string;
toModel: string;
metrics: PerformanceMetrics;
hardwareProfile: HardwareProfile;
timestamp: Date;
}
export class HardwareAwareModelSelector extends EventEmitter {
private logger: Logger;
private modelDetector: IntelligentModelDetector;
private hardwareProfile: HardwareProfile;
private performanceMetrics: Map<string, PerformanceMetrics> = new Map();
private currentModel: string | null = null;
private fallbackModels: ModelInfo[] = [];
private switchingInProgress = false;
private monitoringInterval: NodeJS.Timeout | null = null;
// Performance thresholds for automatic switching
private readonly thresholds = {
maxResponseTime: 30000, // 30 seconds
maxMemoryUsage: 0.85, // 85% of available memory
maxErrorRate: 0.3, // 30% error rate
maxConsecutiveErrors: 3,
minTokensPerSecond: 0.5,
memoryWarningThreshold: 0.75, // 75% memory usage warning
};
constructor() {
super();
this.logger = new Logger('HardwareAwareModelSelector');
this.modelDetector = new IntelligentModelDetector();
this.hardwareProfile = this.assessHardware();
this.startPerformanceMonitoring();
}
/**
* Assess current hardware capabilities
*/
private assessHardware(): HardwareProfile {
const totalMemoryGB = os.totalmem() / (1024 * 1024 * 1024);
const freeMemoryGB = os.freemem() / (1024 * 1024 * 1024);
return {
totalMemoryGB,
availableMemoryGB: freeMemoryGB,
cpuCores: os.cpus().length,
hasGPU: this.detectGPU(),
gpuMemoryGB: this.estimateGPUMemory(),
platform: os.platform(),
arch: os.arch(),
};
}
/**
* Detect if GPU is available (simplified)
*/
private detectGPU(): boolean {
// This is a simplified check - in real implementation,
// you'd use nvidia-ml-py or similar
return (
process.env.CUDA_VISIBLE_DEVICES !== undefined ||
process.env.HIP_VISIBLE_DEVICES !== undefined
);
}
/**
* Estimate GPU memory (simplified)
*/
private estimateGPUMemory(): number | undefined {
if (!this.detectGPU()) return undefined;
// Rough estimation based on common GPU configs
// In real implementation, query actual GPU memory
return 8; // Default estimate: 8GB
}
/**
* Get optimal model configuration based on hardware
*/
async getOptimalModelForHardware(): Promise<OptimalConfiguration> {
await this.modelDetector.scanAvailableModels();
const availableModels = await this.modelDetector.scanAvailableModels();
this.logger.info('Selecting models based on hardware profile:', {
memory: `${this.hardwareProfile.totalMemoryGB.toFixed(1)}GB total, ${this.hardwareProfile.availableMemoryGB.toFixed(1)}GB available`,
cpu: `${this.hardwareProfile.cpuCores} cores`,
gpu: this.hardwareProfile.hasGPU ? `${this.hardwareProfile.gpuMemoryGB}GB` : 'none',
});
// Filter models based on hardware constraints
const suitableModels = this.filterModelsByHardware(availableModels);
if (suitableModels.length === 0) {
throw new Error('No models compatible with current hardware configuration');
}
// Prioritize qwen2.5-coder if available
const qwenCoder = suitableModels.find(m => m.name.toLowerCase().includes('qwen2.5-coder'));
let sortedModels = this.sortModelsByHardwareCompatibility(suitableModels);
if (qwenCoder) {
// Move qwen2.5-coder to the front if it's available
sortedModels = [qwenCoder, ...sortedModels.filter(m => m !== qwenCoder)];
}
// Create fallback chain - only include models smaller or equal in size to primary
const primaryModelSize = this.estimateModelMemoryUsage(sortedModels[0]);
this.fallbackModels = sortedModels
.slice(1, 5)
.filter(m => this.estimateModelMemoryUsage(m) <= primaryModelSize)
.slice(0, 3); // Keep top 3 smaller/equal fallbacks
const primaryModel = sortedModels[0];
const secondaryModel = sortedModels[1] || primaryModel;
return {
writer: {
model: primaryModel.name,
platform: primaryModel.platform,
reasoning: `Hardware-optimized: ${primaryModel.size} model fits ${this.hardwareProfile.availableMemoryGB.toFixed(1)}GB available memory`,
},
auditor: {
model: secondaryModel.name,
platform: secondaryModel.platform,
reasoning: `Secondary model for review tasks`,
},
confidence: this.calculateHardwareConfidence(primaryModel),
};
}
/**
* Filter models that can run on current hardware
*/
private filterModelsByHardware(models: ModelInfo[]): ModelInfo[] {
return models.filter(model => {
const estimatedMemoryGB = this.estimateModelMemoryUsage(model);
// Adaptive safety margin based on current memory pressure
const currentMemoryUsage =
1 - this.hardwareProfile.availableMemoryGB / this.hardwareProfile.totalMemoryGB;
let safetyMargin = 0.7; // Default 70% safety margin
if (currentMemoryUsage > 0.8) {
safetyMargin = 0.5; // More aggressive under high memory pressure
} else if (currentMemoryUsage > 0.7) {
safetyMargin = 0.6; // Moderate adjustment
}
const memoryFitsWithBuffer =
estimatedMemoryGB <= this.hardwareProfile.availableMemoryGB * safetyMargin;
// Additional hardware-specific filters
const cpuSuitable = this.hardwareProfile.cpuCores >= this.getMinCoresForModel(model);
this.logger.debug(
`Model ${model.name}: memory=${estimatedMemoryGB.toFixed(1)}GB, fits=${memoryFitsWithBuffer}, cpu=${cpuSuitable}, margin=${(safetyMargin * 100).toFixed(0)}%`
);
return memoryFitsWithBuffer && cpuSuitable;
});
}
/**
* Sort models by hardware compatibility score
*/
private sortModelsByHardwareCompatibility(models: ModelInfo[]): ModelInfo[] {
return models.sort((a, b) => {
// Give priority to qwen2.5-coder
const aIsQwenCoder = a.name.toLowerCase().includes('qwen2.5-coder');
const bIsQwenCoder = b.name.toLowerCase().includes('qwen2.5-coder');
if (aIsQwenCoder && !bIsQwenCoder) return -1;
if (!aIsQwenCoder && bIsQwenCoder) return 1;
const scoreA = this.calculateHardwareCompatibilityScore(a);
const scoreB = this.calculateHardwareCompatibilityScore(b);
return scoreB - scoreA;
});
}
/**
* Calculate hardware compatibility score
*/
private calculateHardwareCompatibilityScore(model: ModelInfo): number {
let score = 0;
// Memory efficiency
const memoryUsage = this.estimateModelMemoryUsage(model);
const memoryEfficiency = 1 - memoryUsage / this.hardwareProfile.availableMemoryGB;
score += memoryEfficiency * 40; // 40% weight for memory efficiency
// Performance characteristics
const speedBonus =
model.performance.speed === 'fast' ? 30 : model.performance.speed === 'medium' ? 20 : 10;
score += speedBonus; // 30% weight for speed
// Quality vs resource trade-off
const qualityBonus =
model.performance.quality === 'excellent'
? 20
: model.performance.quality === 'good'
? 15
: 10;
score += qualityBonus; // 20% weight for quality
// Hardware-specific bonuses
if (this.hardwareProfile.hasGPU && model.name.includes('gpu')) {
score += 10; // GPU acceleration bonus
}
return score;
}
/**
* Estimate memory usage for a model
*/
private estimateModelMemoryUsage(model: ModelInfo): number {
if (model.sizeBytes) {
// Rule of thumb: model needs 1.2x its size in RAM for inference
return (model.sizeBytes / (1024 * 1024 * 1024)) * 1.2;
}
// Fallback estimation based on name
const nameLower = model.name.toLowerCase();
if (nameLower.includes('72b') || nameLower.includes('70b')) return 40;
if (nameLower.includes('34b') || nameLower.includes('32b')) return 20;
if (nameLower.includes('13b') || nameLower.includes('14b')) return 8;
if (nameLower.includes('7b') || nameLower.includes('8b')) return 4;
if (nameLower.includes('3b') || nameLower.includes('2b')) return 2;
return 4; // Default estimate
}
/**
* Get minimum CPU cores required for model
*/
private getMinCoresForModel(model: ModelInfo): number {
const memoryGB = this.estimateModelMemoryUsage(model);
if (memoryGB > 20) return 8; // Large models need more cores
if (memoryGB > 10) return 4;
return 2; // Minimum for any model
}
/**
* Estimate memory usage by model name string
*/
private estimateModelMemoryUsageByName(modelName: string): number {
const nameLower = modelName.toLowerCase();
if (nameLower.includes('72b') || nameLower.includes('70b')) return 40;
if (nameLower.includes('34b') || nameLower.includes('32b') || nameLower.includes('30b'))
return 20;
if (nameLower.includes('13b') || nameLower.includes('14b')) return 8;
if (nameLower.includes('7b') || nameLower.includes('8b')) return 4;
if (nameLower.includes('3b') || nameLower.includes('2b')) return 2;
if (nameLower.includes('gemma') && nameLower.includes('2b')) return 2;
if (nameLower.includes('qwen2.5-coder')) return 4; // Qwen 2.5 Coder 7B
if (nameLower.includes('llama3.2')) return 2; // Llama 3.2 is small
return 4; // Default estimate
}
/**
* Calculate confidence in hardware configuration
*/
private calculateHardwareConfidence(model: ModelInfo): number {
const memoryUsage = this.estimateModelMemoryUsage(model);
const memoryUtilization = memoryUsage / this.hardwareProfile.availableMemoryGB;
let confidence = 1.0;
// Reduce confidence based on memory pressure
if (memoryUtilization > 0.8) confidence -= 0.4;
else if (memoryUtilization > 0.6) confidence -= 0.2;
else if (memoryUtilization > 0.4) confidence -= 0.1;
// CPU considerations
const minCores = this.getMinCoresForModel(model);
if (this.hardwareProfile.cpuCores < minCores) confidence -= 0.3;
// GPU bonus
if (this.hardwareProfile.hasGPU) confidence += 0.1;
return Math.max(confidence, 0.1);
}
/**
* Record performance metrics for a model
*/
recordPerformance(modelName: string, metrics: Partial<PerformanceMetrics>): void {
const existing = this.performanceMetrics.get(modelName) || {
responseTime: 0,
memoryUsage: 0,
cpuUsage: 0,
errorRate: 0,
tokensPerSecond: 0,
consecutiveErrors: 0,
lastSuccessTime: Date.now(),
};
const updated = { ...existing, ...metrics };
// Update consecutive errors
if (metrics.errorRate !== undefined) {
if (metrics.errorRate > 0) {
updated.consecutiveErrors = existing.consecutiveErrors + 1;
} else {
updated.consecutiveErrors = 0;
updated.lastSuccessTime = Date.now();
}
}
this.performanceMetrics.set(modelName, updated);
// Check if automatic switching is needed
this.checkForAutomaticSwitch(modelName, updated);
}
/**
* Check if model should be switched automatically
*/
private checkForAutomaticSwitch(modelName: string, metrics: PerformanceMetrics): void {
if (this.switchingInProgress || !this.currentModel || this.currentModel !== modelName) {
return;
}
let switchReason: ModelSwitchEvent['reason'] | null = null;
// Check various failure conditions
if (metrics.responseTime > this.thresholds.maxResponseTime) {
switchReason = 'timeout';
} else if (metrics.memoryUsage > this.thresholds.maxMemoryUsage) {
switchReason = 'oom';
} else if (metrics.errorRate > this.thresholds.maxErrorRate) {
switchReason = 'error_threshold';
} else if (metrics.consecutiveErrors >= this.thresholds.maxConsecutiveErrors) {
switchReason = 'error_threshold';
} else if (
metrics.tokensPerSecond < this.thresholds.minTokensPerSecond &&
metrics.tokensPerSecond > 0
) {
switchReason = 'performance_degradation';
}
if (switchReason) {
this.performAutomaticSwitch(switchReason, metrics);
}
}
/**
* Perform automatic model switch
*/
private async performAutomaticSwitch(
reason: ModelSwitchEvent['reason'],
metrics: PerformanceMetrics
): Promise<void> {
if (this.switchingInProgress || this.fallbackModels.length === 0) {
return;
}
this.switchingInProgress = true;
const originalModel = this.currentModel!;
try {
this.logger.warn(`Switching model due to ${reason}:`, {
from: originalModel,
metrics: {
responseTime: metrics.responseTime,
memoryUsage: (metrics.memoryUsage * 100).toFixed(1) + '%',
errorRate: (metrics.errorRate * 100).toFixed(1) + '%',
consecutiveErrors: metrics.consecutiveErrors,
},
});
// Find next suitable model
const nextModel = this.selectFallbackModel(reason, metrics);
if (!nextModel) {
this.logger.error('No suitable fallback models available');
return;
}
this.currentModel = nextModel.name;
// Emit switch event
const switchEvent: ModelSwitchEvent = {
reason,
fromModel: originalModel,
toModel: nextModel.name,
metrics,
hardwareProfile: this.hardwareProfile,
timestamp: new Date(),
};
this.emit('modelSwitch', switchEvent);
this.logger.info(`Successfully switched to ${nextModel.name} (${nextModel.size})`);
} catch (error) {
this.logger.error('Failed to switch model:', error);
} finally {
this.switchingInProgress = false;
}
}
/**
* Select appropriate fallback model based on failure reason
*/
private selectFallbackModel(
reason: ModelSwitchEvent['reason'],
metrics: PerformanceMetrics
): ModelInfo | null {
// Check if current model is already small/efficient
if (this.currentModel) {
const currentModelSize = this.estimateModelMemoryUsageByName(this.currentModel);
// If we're already using a small model (<=4GB) and facing memory issues, don't switch to larger
if ((reason === 'oom' || reason === 'hardware_constraint') && currentModelSize <= 4) {
this.logger.info(
`Keeping current efficient model ${this.currentModel} (${currentModelSize}GB) despite memory pressure`
);
return null; // Don't switch if already using efficient model
}
}
const sortedFallbacks = [...this.fallbackModels];
// Sort fallbacks based on failure reason
if (reason === 'oom' || reason === 'hardware_constraint') {
// Prioritize smaller models for memory issues
sortedFallbacks.sort((a, b) => {
const sizeA = this.estimateModelMemoryUsage(a);
const sizeB = this.estimateModelMemoryUsage(b);
return sizeA - sizeB;
});
} else if (reason === 'timeout' || reason === 'performance_degradation') {
// Prioritize faster models for performance issues
sortedFallbacks.sort((a, b) => {
const speedOrder = { fast: 3, medium: 2, slow: 1 };
return speedOrder[b.performance.speed] - speedOrder[a.performance.speed];
});
}
// Find first fallback that hasn't been problematic
for (const model of sortedFallbacks) {
const modelMetrics = this.performanceMetrics.get(model.name);
if (
!modelMetrics ||
(modelMetrics.consecutiveErrors < this.thresholds.maxConsecutiveErrors &&
modelMetrics.errorRate < this.thresholds.maxErrorRate)
) {
return model;
}
}
// If all models have issues, return the smallest one
return sortedFallbacks[0] || null;
}
/**
* Set current model and initialize monitoring
*/
setCurrentModel(modelName: string): void {
this.currentModel = modelName;
this.logger.info(`Now monitoring model: ${modelName}`);
}
/**
* Start performance monitoring
*/
private startPerformanceMonitoring(): void {
this.monitoringInterval = setInterval(() => {
// TODO: Store interval ID and call clearInterval in cleanup
try {
this.updateHardwareProfile();
} catch (error) {
console.error('Hardware monitoring error:', error);
}
}, 30000); // Update every 30 seconds
// Prevent the interval from keeping the process alive
if (this.monitoringInterval.unref) {
this.monitoringInterval.unref();
}
}
/**
* Update hardware profile (memory availability changes)
*/
private updateHardwareProfile(): void {
const freeMemoryGB = os.freemem() / (1024 * 1024 * 1024);
this.hardwareProfile.availableMemoryGB = freeMemoryGB;
// Check for memory pressure
const memoryUsage = 1 - freeMemoryGB / this.hardwareProfile.totalMemoryGB;
if (memoryUsage > this.thresholds.memoryWarningThreshold && this.currentModel) {
// DISABLED: High memory usage warnings disabled for normal operation
const metrics: PerformanceMetrics = {
responseTime: 0,
memoryUsage,
cpuUsage: 0,
errorRate: 0,
tokensPerSecond: 0,
consecutiveErrors: 0,
lastSuccessTime: Date.now(),
};
if (memoryUsage > this.thresholds.maxMemoryUsage) {
// Don't switch if we're already using an efficient model
const currentModelSize = this.estimateModelMemoryUsageByName(this.currentModel);
if (currentModelSize > 4) {
// Only switch if current model is larger than 4GB
this.checkForAutomaticSwitch(this.currentModel, metrics);
} else {
// DISABLED: Model keeping info disabled for cleaner operation
}
}
}
}
/**
* Get performance report
*/
getPerformanceReport(): any {
return {
hardware: this.hardwareProfile,
currentModel: this.currentModel,
fallbackModels: this.fallbackModels.map(m => ({ name: m.name, size: m.size })),
thresholds: this.thresholds,
metrics: Object.fromEntries(this.performanceMetrics),
};
}
/**
* Force model switch for testing
*/
async forceModelSwitch(targetModel?: string): Promise<boolean> {
if (this.switchingInProgress) return false;
const metrics: PerformanceMetrics = {
responseTime: 0,
memoryUsage: 0.5,
cpuUsage: 0.5,
errorRate: 0,
tokensPerSecond: 1,
consecutiveErrors: 0,
lastSuccessTime: Date.now(),
};
if (targetModel) {
const targetModelInfo = this.fallbackModels.find(m => m.name === targetModel);
if (targetModelInfo) {
this.currentModel = targetModel;
this.emit('modelSwitch', {
reason: 'hardware_constraint',
fromModel: this.currentModel || 'unknown',
toModel: targetModel,
metrics,
hardwareProfile: this.hardwareProfile,
timestamp: new Date(),
});
return true;
}
}
await this.performAutomaticSwitch('hardware_constraint', metrics);
return true;
}
/**
* Cleanup resources
*/
destroy(): void {
if (this.monitoringInterval) {
clearInterval(this.monitoringInterval);
this.monitoringInterval = null;
}
this.removeAllListeners();
}
}
export default HardwareAwareModelSelector;