llm-checker
Version:
Intelligent CLI tool with AI-powered model selection that analyzes your hardware and recommends optimal LLM models for your system
357 lines (305 loc) • 12.3 kB
JavaScript
const OllamaClient = require('./client');
const { EventEmitter } = require('events');
class OllamaManager extends EventEmitter {
constructor(options = {}) {
super();
this.client = new OllamaClient(options.baseURL);
this.modelQueue = [];
this.isProcessing = false;
this.maxConcurrent = options.maxConcurrent || 1;
this.autoCleanup = options.autoCleanup || true;
this.cleanupInterval = options.cleanupInterval || 30 * 60 * 1000; // 30 minutes
if (this.autoCleanup) {
this.startCleanupTimer();
}
}
async initializeManager() {
try {
const status = await this.client.checkOllamaAvailability();
if (!status.available) {
throw new Error(`Ollama not available: ${status.error}`);
}
this.emit('initialized', status);
return status;
} catch (error) {
this.emit('error', error);
throw error;
}
}
async installModel(modelName, options = {}) {
return new Promise((resolve, reject) => {
const task = {
type: 'install',
modelName,
options,
resolve,
reject,
progress: options.onProgress || (() => {})
};
this.modelQueue.push(task);
this.processQueue();
});
}
async removeModel(modelName) {
return new Promise((resolve, reject) => {
const task = {
type: 'remove',
modelName,
resolve,
reject
};
this.modelQueue.push(task);
this.processQueue();
});
}
async updateModel(modelName) {
// Update is essentially a re-pull
return this.installModel(modelName, { force: true });
}
async processQueue() {
if (this.isProcessing || this.modelQueue.length === 0) {
return;
}
this.isProcessing = true;
const task = this.modelQueue.shift();
try {
this.emit('taskStarted', { type: task.type, model: task.modelName });
let result;
if (task.type === 'install') {
result = await this.client.pullModel(task.modelName, task.progress);
} else if (task.type === 'remove') {
result = await this.client.deleteModel(task.modelName);
}
this.emit('taskCompleted', { type: task.type, model: task.modelName, result });
task.resolve(result);
} catch (error) {
this.emit('taskFailed', { type: task.type, model: task.modelName, error });
task.reject(error);
}
this.isProcessing = false;
// Process next task
if (this.modelQueue.length > 0) {
setTimeout(() => this.processQueue(), 1000);
}
}
async getModelStatus(modelName) {
try {
const localModels = await this.client.getLocalModels();
const runningModels = await this.client.getRunningModels();
const localModel = localModels.find(m => m.name === modelName);
const runningModel = runningModels.find(m => m.name === modelName);
return {
installed: !!localModel,
running: !!runningModel,
details: localModel || null,
runtime: runningModel || null
};
} catch (error) {
throw new Error(`Failed to get model status: ${error.message}`);
}
}
async getAllModelsStatus() {
try {
const [localModels, runningModels] = await Promise.all([
this.client.getLocalModels(),
this.client.getRunningModels()
]);
const runningSet = new Set(runningModels.map(m => m.name));
return localModels.map(model => ({
...model,
running: runningSet.has(model.name),
runtime: runningModels.find(r => r.name === model.name) || null
}));
} catch (error) {
throw new Error(`Failed to get models status: ${error.message}`);
}
}
async optimizeModels(hardware) {
try {
const modelsStatus = await this.getAllModelsStatus();
const recommendations = [];
// Find models that could be optimized
for (const model of modelsStatus) {
const analysis = await this.analyzeModelOptimization(model, hardware);
if (analysis.canOptimize) {
recommendations.push(analysis);
}
}
return {
totalModels: modelsStatus.length,
optimizable: recommendations.length,
recommendations,
estimatedSavings: this.calculateSavings(recommendations)
};
} catch (error) {
throw new Error(`Failed to optimize models: ${error.message}`);
}
}
async analyzeModelOptimization(model, hardware) {
// Analyze if a model can be optimized (re-quantized, etc.)
const currentQuant = model.quantization || 'Unknown';
const modelSizeGB = model.fileSizeGB;
// Suggest better quantization
let suggestedQuant = currentQuant;
let canOptimize = false;
if (hardware.memory.total >= 32 && currentQuant === 'Q4_0') {
suggestedQuant = 'Q5_K_M';
canOptimize = true;
} else if (hardware.memory.total <= 8 && currentQuant === 'Q8_0') {
suggestedQuant = 'Q4_K_M';
canOptimize = true;
}
return {
model: model.name,
currentQuantization: currentQuant,
suggestedQuantization: suggestedQuant,
currentSize: modelSizeGB,
estimatedNewSize: canOptimize ? modelSizeGB * 0.8 : modelSizeGB,
canOptimize,
reason: canOptimize ?
`Better quantization available for your hardware` :
'Current quantization is optimal'
};
}
calculateSavings(recommendations) {
const totalCurrentSize = recommendations.reduce((sum, r) => sum + r.currentSize, 0);
const totalNewSize = recommendations.reduce((sum, r) => sum + r.estimatedNewSize, 0);
return {
currentSize: Math.round(totalCurrentSize * 10) / 10,
newSize: Math.round(totalNewSize * 10) / 10,
saved: Math.round((totalCurrentSize - totalNewSize) * 10) / 10,
percentage: Math.round(((totalCurrentSize - totalNewSize) / totalCurrentSize) * 100)
};
}
async cleanupUnusedModels() {
try {
const runningModels = await this.client.getRunningModels();
const allModels = await this.client.getLocalModels();
// Find models not running for extended period
const candidates = allModels.filter(model => {
const isRunning = runningModels.some(r => r.name === model.name);
const lastModified = new Date(model.modified);
const daysSinceModified = (Date.now() - lastModified.getTime()) / (1000 * 60 * 60 * 24);
return !isRunning && daysSinceModified > 30; // Not used in 30 days
});
this.emit('cleanupCandidatesFound', candidates);
return {
totalModels: allModels.length,
runningModels: runningModels.length,
cleanupCandidates: candidates.length,
candidates: candidates.map(m => ({
name: m.name,
size: m.fileSizeGB,
lastUsed: m.modified
}))
};
} catch (error) {
throw new Error(`Failed to cleanup analysis: ${error.message}`);
}
}
async performCleanup(modelNames) {
const results = [];
for (const modelName of modelNames) {
try {
await this.removeModel(modelName);
results.push({ model: modelName, success: true });
this.emit('modelCleaned', modelName);
} catch (error) {
results.push({ model: modelName, success: false, error: error.message });
this.emit('cleanupError', { model: modelName, error });
}
}
return results;
}
async benchmarkModel(modelName, options = {}) {
const testPrompts = options.prompts || [
"Hello, how are you?",
"Explain quantum computing in simple terms.",
"Write a short Python function to sort a list."
];
const results = [];
for (const prompt of testPrompts) {
try {
const result = await this.client.testModelPerformance(modelName, prompt);
results.push({
prompt: prompt.substring(0, 50) + (prompt.length > 50 ? '...' : ''),
...result
});
} catch (error) {
results.push({
prompt: prompt.substring(0, 50) + (prompt.length > 50 ? '...' : ''),
success: false,
error: error.message
});
}
}
// Calculate averages
const successful = results.filter(r => r.success);
const avgTokensPerSecond = successful.length > 0 ?
successful.reduce((sum, r) => sum + r.tokensPerSecond, 0) / successful.length : 0;
const avgResponseTime = successful.length > 0 ?
successful.reduce((sum, r) => sum + r.responseTime, 0) / successful.length : 0;
return {
model: modelName,
testCount: testPrompts.length,
successfulTests: successful.length,
failedTests: results.length - successful.length,
averageTokensPerSecond: Math.round(avgTokensPerSecond * 10) / 10,
averageResponseTime: Math.round(avgResponseTime),
detailedResults: results
};
}
startCleanupTimer() {
setInterval(async () => {
try {
const analysis = await this.cleanupUnusedModels();
if (analysis.cleanupCandidates > 0) {
this.emit('cleanupSuggested', analysis);
}
} catch (error) {
this.emit('error', error);
}
}, this.cleanupInterval);
}
async getStatistics() {
try {
const [localModels, runningModels] = await Promise.all([
this.client.getLocalModels(),
this.client.getRunningModels()
]);
const totalSize = localModels.reduce((sum, m) => sum + m.fileSizeGB, 0);
const avgSize = localModels.length > 0 ? totalSize / localModels.length : 0;
// Group by quantization
const quantizationStats = {};
localModels.forEach(model => {
const quant = model.quantization || 'Unknown';
quantizationStats[quant] = (quantizationStats[quant] || 0) + 1;
});
// Group by family
const familyStats = {};
localModels.forEach(model => {
const family = model.family || 'Unknown';
familyStats[family] = (familyStats[family] || 0) + 1;
});
return {
total: localModels.length,
running: runningModels.length,
totalSizeGB: Math.round(totalSize * 10) / 10,
averageSizeGB: Math.round(avgSize * 10) / 10,
quantizationBreakdown: quantizationStats,
familyBreakdown: familyStats,
queueLength: this.modelQueue.length,
isProcessing: this.isProcessing
};
} catch (error) {
throw new Error(`Failed to get statistics: ${error.message}`);
}
}
destroy() {
// Clean up resources
this.removeAllListeners();
this.modelQueue = [];
this.isProcessing = false;
}
}
module.exports = OllamaManager;