capsule-ai-cli
Version:
The AI Model Orchestrator - Intelligent multi-model workflows with device-locked licensing
105 lines ⢠3.76 kB
JavaScript
import { stateService } from '../../services/state.js';
import { providerRegistry } from '../../providers/base.js';
import { v4 as uuidv4 } from 'uuid';
import chalk from 'chalk';
export const benchmarkCommand = {
name: 'benchmark',
description: 'Benchmark model performance',
alias: ['bench'],
async execute(args) {
const testPrompt = args.join(' ') || 'Write a haiku about artificial intelligence.';
const currentProvider = stateService.getProvider();
const currentModel = stateService.getModel();
const results = [];
try {
const result = await benchmarkModel(currentProvider, currentModel, testPrompt);
results.push(result);
}
catch (error) {
results.push({
model: currentModel,
provider: currentProvider,
firstTokenMs: 0,
totalTimeMs: 0,
tokensPerSecond: 0,
promptTokens: 0,
completionTokens: 0,
cost: 0,
error: error instanceof Error ? error.message : 'Unknown error'
});
}
const formattedResults = formatBenchmarkResults(results);
return {
success: true,
message: formattedResults
};
}
};
async function benchmarkModel(providerName, model, prompt) {
const provider = providerRegistry.get(providerName);
if (!provider) {
throw new Error(`Provider ${providerName} not found`);
}
const startTime = Date.now();
let firstTokenTime = 0;
let tokenCount = 0;
const messages = [{
id: uuidv4(),
role: 'user',
content: prompt,
timestamp: new Date()
}];
const streamGenerator = provider.stream(messages, { model });
let usage = null;
for await (const chunk of streamGenerator) {
if (chunk.delta) {
if (firstTokenTime === 0) {
firstTokenTime = Date.now() - startTime;
}
tokenCount++;
}
if (chunk.usage) {
usage = chunk.usage;
}
}
const totalTime = Date.now() - startTime;
if (usage) {
return {
model,
provider: providerName,
firstTokenMs: firstTokenTime,
totalTimeMs: totalTime,
tokensPerSecond: usage.completionTokens / (totalTime / 1000),
promptTokens: usage.promptTokens,
completionTokens: usage.completionTokens,
cost: provider.calculateCost(usage, model).amount
};
}
return {
model,
provider: providerName,
firstTokenMs: firstTokenTime,
totalTimeMs: totalTime,
tokensPerSecond: tokenCount / (totalTime / 1000),
promptTokens: 0,
completionTokens: tokenCount,
cost: 0
};
}
function formatBenchmarkResults(results) {
let output = chalk.bold('\nš Benchmark Results\n\n');
for (const result of results) {
if (result.error) {
output += chalk.red(`ā ${result.provider}/${result.model}: ${result.error}\n`);
continue;
}
output += chalk.cyan(`${result.provider}/${result.model}:\n`);
output += ` ā±ļø First token: ${chalk.green(result.firstTokenMs + 'ms')}\n`;
output += ` ā±ļø Total time: ${chalk.green(result.totalTimeMs + 'ms')}\n`;
output += ` š Speed: ${chalk.green(result.tokensPerSecond.toFixed(1) + ' tokens/s')}\n`;
output += ` š Tokens: ${result.promptTokens} ā ${result.completionTokens}\n`;
output += ` š° Cost: $${result.cost.toFixed(6)}\n\n`;
}
return output;
}
//# sourceMappingURL=benchmark.js.map