llm-checker
Version:
Intelligent CLI tool with AI-powered model selection that analyzes your hardware and recommends optimal LLM models for your system
415 lines (347 loc) • 15.5 kB
JavaScript
class IntelligentModelRecommender {
constructor() {
this.categories = {
coding: {
weight: 1.0,
keywords: ['code', 'programming', 'development', 'coder', 'deepseek', 'codellama'],
preferredSizes: ['1b', '3b', '7b', '13b'],
hardwareMinimums: { ram: 4, vram: 2, cpu_cores: 4 }
},
reading: {
weight: 1.0,
keywords: ['reading', 'comprehension', 'text', 'analysis', 'llama', 'mistral'],
preferredSizes: ['3b', '7b', '13b', '20b'],
hardwareMinimums: { ram: 4, vram: 1, cpu_cores: 2 }
},
talking: {
weight: 1.0,
keywords: ['chat', 'conversation', 'assistant', 'phi', 'gemma'],
preferredSizes: ['1b', '3b', '7b'],
hardwareMinimums: { ram: 2, vram: 1, cpu_cores: 2 }
},
reasoning: {
weight: 1.2,
keywords: ['reason', 'logic', 'math', 'problem', 'deepseek-r1', 'qwen'],
preferredSizes: ['7b', '14b', '32b', '70b'],
hardwareMinimums: { ram: 8, vram: 4, cpu_cores: 6 }
},
multimodal: {
weight: 1.1,
keywords: ['vision', 'image', 'multimodal', 'llava', 'pixtral'],
preferredSizes: ['7b', '12b', '22b'],
hardwareMinimums: { ram: 8, vram: 6, cpu_cores: 4 }
},
creative: {
weight: 0.9,
keywords: ['creative', 'writing', 'story', 'novel', 'llama', 'mistral'],
preferredSizes: ['7b', '13b', '20b'],
hardwareMinimums: { ram: 6, vram: 2, cpu_cores: 4 }
},
general: {
weight: 1.0,
keywords: ['general', 'assistant', 'helper'],
preferredSizes: ['1b', '3b', '7b', '13b'],
hardwareMinimums: { ram: 2, vram: 1, cpu_cores: 2 }
}
};
}
/**
* Algoritmo inteligente para recomendar el mejor modelo para cada categoría
* Basado en hardware del usuario y análisis heurístico avanzado
*/
getBestModelsForHardware(hardware, allModels) {
const hardwareTier = this.calculateHardwareTier(hardware);
const recommendations = {};
console.log(`🧠 Analyzing ${allModels.length} models for hardware tier: ${hardwareTier}`);
// Para cada categoría, encontrar los mejores modelos
Object.keys(this.categories).forEach(category => {
const categoryModels = this.filterModelsByCategory(allModels, category);
const scoredModels = this.scoreModelsForCategory(categoryModels, category, hardware);
const bestModels = this.selectBestModels(scoredModels, hardwareTier, 3);
recommendations[category] = {
tier: hardwareTier,
bestModels: bestModels,
totalEvaluated: categoryModels.length,
category: this.categories[category]
};
});
return recommendations;
}
calculateHardwareTier(hardware) {
const { memory, gpu, cpu } = hardware;
const ram = memory.total;
const vram = gpu.vram || 0;
const cores = cpu.cores;
// Algoritmo matemático avanzado para determinar tier
const ramScore = Math.min(100, (ram / 64) * 100);
const vramScore = Math.min(100, (vram / 32) * 100);
const cpuScore = Math.min(100, (cores / 16) * 100);
// Peso basado en importancia para LLMs
const totalScore = (ramScore * 0.5) + (cpuScore * 0.3) + (vramScore * 0.2);
if (totalScore >= 80) return 'ultra_high';
if (totalScore >= 60) return 'high';
if (totalScore >= 40) return 'medium';
if (totalScore >= 20) return 'low';
return 'ultra_low';
}
filterModelsByCategory(allModels, category) {
const categoryInfo = this.categories[category];
if (!categoryInfo) return [];
return allModels.filter(model => {
// Filtrar por palabras clave en nombre, descripción o use_cases
const searchText = [
model.model_name,
model.description,
model.detailed_description,
...(model.use_cases || []),
model.model_identifier,
model.category
].join(' ').toLowerCase();
const matchesKeywords = categoryInfo.keywords.some(keyword =>
searchText.includes(keyword.toLowerCase())
);
// También incluir modelos generales para categorías básicas
const isGeneralModel = model.category === 'general' &&
['talking', 'reading', 'general'].includes(category);
return matchesKeywords || isGeneralModel;
});
}
scoreModelsForCategory(models, category, hardware) {
const categoryInfo = this.categories[category];
return models.map(model => {
let score = 50; // Base score
// Factor 1: Compatibilidad con hardware (40%)
const hardwareScore = this.calculateHardwareCompatibility(model, hardware);
score += hardwareScore * 0.4;
// Factor 2: Especialización para la categoría (30%)
const specializationScore = this.calculateSpecializationScore(model, category);
score += specializationScore * 0.3;
// Factor 3: Popularidad y confiabilidad (20%)
const popularityScore = this.calculatePopularityScore(model);
score += popularityScore * 0.2;
// Factor 4: Eficiencia (tamaño vs rendimiento) (10%)
const efficiencyScore = this.calculateEfficiencyScore(model, hardware);
score += efficiencyScore * 0.1;
// Penalizaciones
score = this.applyPenalties(score, model, hardware);
// Bonus especiales
score = this.applyBonuses(score, model, category, hardware);
return {
...model,
categoryScore: Math.min(100, Math.max(0, score)),
hardwareScore,
specializationScore,
popularityScore,
efficiencyScore
};
});
}
calculateHardwareCompatibility(model, hardware) {
let score = 0;
// Estimar requisitos del modelo
const estimatedRAM = this.estimateRAMRequirement(model);
const estimatedVRAM = this.estimateVRAMRequirement(model);
// RAM compatibility (40% of hardware score)
const ramRatio = hardware.memory.total / estimatedRAM;
if (ramRatio >= 3) score += 40;
else if (ramRatio >= 2) score += 30;
else if (ramRatio >= 1.5) score += 20;
else if (ramRatio >= 1) score += 10;
else score -= 20;
// VRAM compatibility (30% of hardware score)
const vramRatio = (hardware.gpu.vram || 0) / estimatedVRAM;
if (vramRatio >= 2) score += 30;
else if (vramRatio >= 1) score += 20;
else if (vramRatio >= 0.5) score += 10;
// CPU compatibility (30% of hardware score)
if (hardware.cpu.cores >= 8) score += 30;
else if (hardware.cpu.cores >= 4) score += 20;
else if (hardware.cpu.cores >= 2) score += 10;
return Math.min(100, Math.max(0, score));
}
calculateSpecializationScore(model, category) {
const categoryInfo = this.categories[category];
let score = 0;
// Coincidencia directa de categoría
if (model.category === category) {
score += 50;
}
// Coincidencia en use_cases
if (model.use_cases) {
const matchingUseCases = model.use_cases.filter(useCase =>
categoryInfo.keywords.some(keyword =>
useCase.toLowerCase().includes(keyword.toLowerCase())
)
);
score += Math.min(30, matchingUseCases.length * 10);
}
// Coincidencia en nombre/identificador
const modelText = `${model.model_name} ${model.model_identifier}`.toLowerCase();
const keywordMatches = categoryInfo.keywords.filter(keyword =>
modelText.includes(keyword.toLowerCase())
);
score += Math.min(20, keywordMatches.length * 5);
return Math.min(100, score);
}
calculatePopularityScore(model) {
const pulls = model.pulls || 0;
if (pulls > 10000000) return 100; // 10M+ pulls
if (pulls > 1000000) return 80; // 1M+ pulls
if (pulls > 100000) return 60; // 100K+ pulls
if (pulls > 10000) return 40; // 10K+ pulls
if (pulls > 1000) return 20; // 1K+ pulls
return 10; // Less popular models
}
calculateEfficiencyScore(model, hardware) {
const modelSize = this.extractModelSizeGB(model);
const hardwareTier = this.calculateHardwareTier(hardware);
// Modelos más pequeños son más eficientes para hardware limitado
if (hardwareTier === 'ultra_low' || hardwareTier === 'low') {
if (modelSize <= 1) return 100;
if (modelSize <= 3) return 80;
if (modelSize <= 7) return 60;
return 20;
}
// Para hardware potente, balance entre tamaño y capacidad
if (hardwareTier === 'high' || hardwareTier === 'ultra_high') {
if (modelSize >= 13 && modelSize <= 30) return 100;
if (modelSize >= 7 && modelSize <= 13) return 90;
if (modelSize >= 3 && modelSize <= 7) return 80;
return 60;
}
// Hardware medio
if (modelSize >= 3 && modelSize <= 13) return 100;
if (modelSize <= 3) return 80;
return 40;
}
applyPenalties(score, model, hardware) {
// Penalización por modelos muy grandes para hardware limitado
const modelSize = this.extractModelSizeGB(model);
const requiredRAM = this.estimateRAMRequirement(model);
if (requiredRAM > hardware.memory.total) {
score -= 30; // Penalización severa
}
if (modelSize > 30 && this.calculateHardwareTier(hardware) === 'low') {
score -= 20; // Modelo demasiado grande
}
return score;
}
applyBonuses(score, model, category, hardware) {
// Bonus por arquitectura Apple Silicon
if (hardware.cpu.architecture === 'Apple Silicon') {
score += 5;
}
// Bonus por modelos muy recientes (2024-2025)
const currentYear = new Date().getFullYear();
if (model.last_updated && model.last_updated.includes('day')) {
score += 10; // Modelo muy reciente
}
// Bonus por quantización disponible
if (model.quantizations && model.quantizations.length > 0) {
score += 5;
}
// Bonus especial por categoría
const categoryWeight = this.categories[category].weight;
score *= categoryWeight;
return score;
}
selectBestModels(scoredModels, hardwareTier, count = 3) {
// Ordenar por score y seleccionar los mejores
const sorted = scoredModels
.sort((a, b) => b.categoryScore - a.categoryScore)
.slice(0, count * 2); // Tomar más para diversidad
const selected = [];
const seenBases = new Set();
// Seleccionar evitando duplicados de la misma base (ej: llama3.1:7b y llama3.1:8b)
for (const model of sorted) {
if (selected.length >= count) break;
const baseName = this.extractBaseName(model.model_identifier);
if (!seenBases.has(baseName)) {
selected.push(model);
seenBases.add(baseName);
}
}
// Si no tenemos suficientes, agregar más sin restricción
if (selected.length < count) {
const remaining = sorted.filter(m => !selected.includes(m));
selected.push(...remaining.slice(0, count - selected.length));
}
return selected;
}
extractBaseName(identifier) {
// Extraer nombre base del modelo (ej: "llama3.1:7b" -> "llama3.1")
return identifier.split(':')[0].split('-')[0];
}
estimateRAMRequirement(model) {
const size = this.extractModelSizeGB(model);
// Estimación: modelo necesita ~1.2x su tamaño en RAM mínimo
return Math.max(1, size * 1.2);
}
estimateVRAMRequirement(model) {
const size = this.extractModelSizeGB(model);
// Para VRAM, modelos pequeños pueden correr solo en CPU
if (size <= 3) return 0;
return Math.max(2, size * 0.6);
}
extractModelSizeGB(model) {
// Intentar extraer tamaño de diferentes campos
const sources = [
model.model_identifier,
model.model_name,
...(model.model_sizes || []),
...(model.tags || [])
];
for (const source of sources) {
if (!source) continue;
const sizeMatch = source.match(/(\d+\.?\d*)\s*[bg]/i);
if (sizeMatch) {
const num = parseFloat(sizeMatch[1]);
const unit = sizeMatch[0].slice(-1).toLowerCase();
return unit === 'b' ? num : (unit === 'g' ? num : num);
}
}
return 1; // Default size
}
generateRecommendationSummary(recommendations, hardware) {
const summary = {
hardware_tier: this.calculateHardwareTier(hardware),
total_categories: Object.keys(recommendations).length,
best_overall: null,
by_category: {},
quick_commands: []
};
let bestOverallScore = 0;
let bestOverallModel = null;
Object.entries(recommendations).forEach(([category, data]) => {
const bestModel = data.bestModels[0];
if (bestModel) {
summary.by_category[category] = {
name: bestModel.model_name,
identifier: bestModel.model_identifier,
score: Math.round(bestModel.categoryScore),
command: `ollama pull ${bestModel.model_identifier}`,
size: this.extractModelSizeGB(bestModel) + 'B',
pulls: bestModel.pulls
};
summary.quick_commands.push(`ollama pull ${bestModel.model_identifier}`);
if (bestModel.categoryScore > bestOverallScore) {
bestOverallScore = bestModel.categoryScore;
bestOverallModel = bestModel;
}
}
});
if (bestOverallModel) {
summary.best_overall = {
name: bestOverallModel.model_name,
identifier: bestOverallModel.model_identifier,
category: Object.keys(recommendations).find(cat =>
recommendations[cat].bestModels.includes(bestOverallModel)
),
score: Math.round(bestOverallScore),
command: `ollama pull ${bestOverallModel.model_identifier}`
};
}
return summary;
}
}
module.exports = IntelligentModelRecommender;