UNPKG

olbench

Version:

Comprehensive Node.js-based benchmarking tool for Ollama local LLMs

github.com/zaakirio/olbench

zaakirio/olbench

304 lines • 10.8 kB

JavaScript

export const MODEL_TIERS = [ { name: 'Tier 1 (4GB-7GB)', ramRange: [4, 7], priority: 1, models: [ { name: 'deepseek-r1:1.5b', priority: 1, description: 'DeepSeek R1 1.5B - Lightweight reasoning model', memoryRequirement: 1.2, cpuOptimized: true, quantization: 'Q4_0' }, { name: 'gemma:2b', priority: 2, description: 'Google Gemma 2B - Efficient small model', memoryRequirement: 1.5, cpuOptimized: true, quantization: 'Q4_0' }, { name: 'phi:2.7b', priority: 3, description: 'Microsoft Phi 2.7B - Small but capable', memoryRequirement: 2.0, cpuOptimized: true, quantization: 'Q4_0' }, { name: 'phi3:3.8b', priority: 4, description: 'Microsoft Phi-3 3.8B - Enhanced small model', memoryRequirement: 2.8, cpuOptimized: true, gpuOptimized: true, quantization: 'Q4_0' }, ], }, { name: 'Tier 2 (8GB-15GB)', ramRange: [8, 15], priority: 2, models: [ { name: 'phi3:3.8b', priority: 1, description: 'Microsoft Phi-3 3.8B - Enhanced small model', memoryRequirement: 2.8, cpuOptimized: true, gpuOptimized: true, quantization: 'Q4_0' }, { name: 'gemma2:9b', priority: 2, description: 'Google Gemma 2 9B - Balanced performance', memoryRequirement: 5.5, gpuOptimized: true, quantization: 'Q4_0' }, { name: 'mistral:7b', priority: 3, description: 'Mistral 7B - High-performance medium model', memoryRequirement: 4.1, gpuOptimized: true, quantization: 'Q4_0' }, { name: 'llama3.1:8b', priority: 4, description: 'Meta Llama 3.1 8B - Latest Llama model', memoryRequirement: 4.7, gpuOptimized: true, quantization: 'Q4_0' }, { name: 'deepseek-r1:8b', priority: 5, description: 'DeepSeek R1 8B - Medium reasoning model', memoryRequirement: 4.9, gpuOptimized: true, quantization: 'Q4_0' }, { name: 'llava:7b', priority: 6, description: 'LLaVA 7B - Multimodal vision-language model', memoryRequirement: 4.5, gpuOptimized: true, quantization: 'Q4_0' }, ], }, { name: 'Tier 3 (16GB-31GB)', ramRange: [16, 31], priority: 3, models: [ { name: 'gemma2:9b', priority: 1, description: 'Google Gemma 2 9B - Balanced performance', memoryRequirement: 5.5, gpuOptimized: true, quantization: 'Q4_0' }, { name: 'mistral:7b', priority: 2, description: 'Mistral 7B - High-performance medium model', memoryRequirement: 4.1, gpuOptimized: true, quantization: 'Q4_0' }, { name: 'phi4:14b', priority: 3, description: 'Microsoft Phi-4 14B - Advanced reasoning', memoryRequirement: 8.2, gpuOptimized: true, quantization: 'Q4_0' }, { name: 'deepseek-r1:8b', priority: 4, description: 'DeepSeek R1 8B - Medium reasoning model', memoryRequirement: 4.9, gpuOptimized: true, quantization: 'Q4_0' }, { name: 'deepseek-r1:14b', priority: 5, description: 'DeepSeek R1 14B - Large reasoning model', memoryRequirement: 8.5, gpuOptimized: true, quantization: 'Q4_0' }, { name: 'llava:7b', priority: 6, description: 'LLaVA 7B - Multimodal vision-language model', memoryRequirement: 4.5, gpuOptimized: true, quantization: 'Q4_0' }, { name: 'llava:13b', priority: 7, description: 'LLaVA 13B - Large multimodal model', memoryRequirement: 7.8, gpuOptimized: true, quantization: 'Q4_0' }, ], }, { name: 'Tier 4 (32GB+)', ramRange: [32, Infinity], priority: 4, models: [ { name: 'phi4:14b', priority: 1, description: 'Microsoft Phi-4 14B - Advanced reasoning', memoryRequirement: 8.2, gpuOptimized: true, quantization: 'Q4_0' }, { name: 'deepseek-r1:14b', priority: 2, description: 'DeepSeek R1 14B - Large reasoning model', memoryRequirement: 8.5, gpuOptimized: true, quantization: 'Q4_0' }, { name: 'deepseek-r1:32b', priority: 3, description: 'DeepSeek R1 32B - Extra large reasoning model', memoryRequirement: 18.9, gpuOptimized: true, quantization: 'Q4_0' }, ], }, ]; export class ModelTierManager { tiers = MODEL_TIERS; getTierByRAM(ramGB) { return this.tiers.find(tier => ramGB >= tier.ramRange[0] && ramGB <= tier.ramRange[1]) || null; } getTierByName(tierName) { return this.tiers.find(tier => tier.name === tierName) || null; } getAllTiers() { return [...this.tiers]; } getModelsForRAM(ramGB) { const tier = this.getTierByRAM(ramGB); if (!tier) return []; // Sort by priority (lower number = higher priority) return [...tier.models].sort((a, b) => a.priority - b.priority); } getModelByName(modelName) { for (const tier of this.tiers) { const model = tier.models.find(m => m.name === modelName); if (model) return model; } return null; } // Get recommended models based on RAM and optional filter getRecommendedModels(ramGB, count = 3) { const models = this.getModelsForRAM(ramGB); return models.slice(0, count); } // Get hardware-aware recommended models getHardwareAwareRecommendations(systemInfo, count = 3) { // Calculate effective RAM (available - 2GB buffer) const effectiveRAM = Math.max(0, systemInfo.availableRAM - 2); // Get all models that can run with effective RAM (deduplicated) const modelMap = new Map(); for (const tier of this.tiers) { for (const model of tier.models) { if (model.memoryRequirement <= effectiveRAM && !modelMap.has(model.name)) { modelMap.set(model.name, model); } } } const viableModels = Array.from(modelMap.values()); // Score models based on hardware const scoredModels = viableModels.map(model => { let score = 100 - model.priority; // Base score (inverse priority) // GPU scoring if (systemInfo.hasCUDA && model.gpuOptimized) { score += 50; // Strong preference for GPU-optimized models on CUDA systems } else if (systemInfo.hasGPU && model.gpuOptimized) { score += 20; // Some preference for GPU models on non-CUDA GPUs } // CPU scoring if (!systemInfo.hasGPU && model.cpuOptimized) { score += 30; // Prefer CPU-optimized models on CPU-only systems } // Architecture scoring if (systemInfo.architecture === 'arm64' && systemInfo.os === 'darwin') { // Apple Silicon optimization if (model.cpuOptimized) score += 15; } // Memory efficiency scoring const memoryUtilization = model.memoryRequirement / effectiveRAM; if (memoryUtilization >= 0.5 && memoryUtilization <= 0.8) { score += 10; // Optimal memory utilization } else if (memoryUtilization < 0.3) { score -= 10; // Underutilizing available memory } // Model size bonus (prefer larger models that fit) score += Math.log(model.memoryRequirement) * 5; return { model, score }; }); // Sort by score and return top models scoredModels.sort((a, b) => b.score - a.score); return scoredModels.slice(0, count).map(s => s.model); } // Check if a model is available for given RAM isModelAvailable(modelName, ramGB) { const availableModels = this.getModelsForRAM(ramGB); return availableModels.some(m => m.name === modelName); } // Get all unique models across all tiers getAllModels() { const modelMap = new Map(); for (const tier of this.tiers) { for (const model of tier.models) { if (!modelMap.has(model.name)) { modelMap.set(model.name, model); } } } return Array.from(modelMap.values()); } // Format tier information for display formatTierInfo(tier) { const lines = [ `${tier.name}:`, ` RAM Range: ${tier.ramRange[0]}GB - ${tier.ramRange[1] === Infinity ? '∞' : tier.ramRange[1] + 'GB'}`, ` Models (${tier.models.length}):`, ]; tier.models.forEach((model, index) => { lines.push(` ${index + 1}. ${model.name}${model.description ? ' - ' + model.description : ''}`); }); return lines.join('\\n'); } } //# sourceMappingURL=model-tiers.js.map