llm-info
Version:
Information on LLM models, context window token limit, output token limit, pricing and more
368 lines (339 loc) • 10.1 kB
JavaScript
import OpenAI from 'openai';
import dotenv from 'dotenv';
import fs from 'fs';
import path from 'path';
dotenv.config();
// Common OpenAI client config with timeout
const clientConfig = {
timeout: 10000, // 10 seconds timeout
};
const providers = {
deepseek: {
name: 'DeepSeek',
client: new OpenAI({
baseURL: 'https://api.deepseek.com',
apiKey: process.env.DEEPSEEK_API_KEY,
...clientConfig,
}),
model: 'deepseek-reasoner',
skip: false,
},
deepinfra: {
name: 'DeepInfra',
client: new OpenAI({
baseURL: 'https://api.deepinfra.com/v1/openai',
apiKey: process.env.DEEPINFRA_TOKEN,
...clientConfig,
}),
model: 'deepseek-ai/DeepSeek-R1',
skip: false,
},
fireworks: {
name: 'Fireworks',
client: new OpenAI({
baseURL: 'https://api.fireworks.ai/inference/v1',
apiKey: process.env.FIREWORKS_API_KEY,
...clientConfig,
}),
model: 'accounts/fireworks/models/deepseek-r1',
skip: false,
},
together: {
name: 'Together',
client: new OpenAI({
baseURL: 'https://api.together.xyz/v1',
apiKey: process.env.TOGETHER_API_KEY,
...clientConfig,
}),
model: 'deepseek-ai/DeepSeek-R1',
skip: false,
},
chutes: {
name: 'Chutes',
client: new OpenAI({
baseURL: 'https://chutes-deepseek-ai-deepseek-r1.chutes.ai/v1',
apiKey: process.env.CHUTES_API_KEY,
...clientConfig,
}),
model: 'deepseek-ai/DeepSeek-R1',
skip: true, // requires TAO balance
},
hyperbolic: {
name: 'Hyperbolic',
client: new OpenAI({
baseURL: 'https://api.hyperbolic.xyz/v1',
apiKey: process.env.HYPERBOLIC_API_KEY,
...clientConfig,
}),
model: 'deepseek-ai/DeepSeek-R1',
skip: false,
},
azure: {
name: 'Azure',
longName: 'Azure AI Foundry',
client: new OpenAI({
baseURL: process.env.AZURE_AI_FOUNDRY_ENDPOINT,
apiKey: process.env.AZURE_AI_FOUNDRY_API_KEY,
...clientConfig,
}),
model: 'random-string', // does not matter since URL is already model specific
skip: false,
},
nebius: {
name: 'Nebius',
client: new OpenAI({
baseURL: 'https://api.studio.nebius.ai/v1/',
apiKey: process.env.NEBIUS_API_KEY,
...clientConfig,
}),
model: 'deepseek-ai/DeepSeek-R1',
skip: false,
},
nvidia: {
name: 'Nvidia',
client: new OpenAI({
baseURL: 'https://integrate.api.nvidia.com/v1',
apiKey: process.env.NVIDIA_API_KEY,
...clientConfig,
}),
model: 'deepseek-ai/deepseek-r1',
// Nvidia is stuck at streaming without timeout, so we skip it for now
skip: true,
},
kluster: {
name: 'Kluster',
client: new OpenAI({
baseURL: 'https://api.kluster.ai/v1',
apiKey: process.env.KLUSTER_API_KEY,
...clientConfig,
}),
model: 'deepseek-ai/DeepSeek-R1',
skip: false,
},
novita: {
name: 'Novita',
client: new OpenAI({
baseURL: 'https://api.novita.ai/v3/openai',
apiKey: process.env.NOVITA_API_KEY,
...clientConfig,
}),
// Novita recommends deepseek-r1-turbo for better performance and value
model: 'deepseek/deepseek-r1-turbo',
skip: false,
},
};
// List of countries in Europe to test, excluding France
const countries = [
'Germany',
'Italy',
'Spain',
'Portugal',
'Greece',
'Netherlands',
'Belgium',
'Switzerland',
'Austria',
'Sweden',
'Norway',
];
const testPrompt =
'What is the capital of ' +
countries[Math.floor(Math.random() * countries.length)] +
'?';
async function measureSpeed(provider, showOutput = false) {
const startTime = Date.now();
let content = '';
let usage = null;
let firstResponseTime = null;
try {
console.log(`\nStarting ${provider.name} API speed benchmark...`);
const stream = await provider.client.chat.completions.create({
messages: [{ role: 'user', content: testPrompt }],
model: provider.model,
stream: true,
stream_options: {
include_usage: true,
},
});
process.stdout.write('\rReceiving response...');
for await (const chunk of stream) {
// Record time of first response if not already set
if (firstResponseTime === null) {
firstResponseTime = (Date.now() - startTime) / 1000;
}
const delta = chunk.choices[0]?.delta;
if (
delta?.content !== undefined ||
delta?.reasoning_content !== undefined
) {
const content_delta = delta.content || delta.reasoning_content;
content += content_delta;
if (showOutput && content_delta !== undefined) {
if (content_delta !== null) {
process.stdout.write(content_delta);
}
} else {
process.stdout.write(
'\rReceiving response... ' + content.length + ' chars'
);
}
if (chunk.usage) {
usage = chunk.usage;
}
} else if (chunk.usage) {
usage = chunk.usage;
} else if (chunk.choices[0]?.finish_reason !== null) {
console.log('\n\nResponse finished');
if (chunk.usage) {
usage = chunk.usage;
}
} else {
console.log('unknown chunk');
console.log(chunk.choices[0].delta);
}
}
const endTime = Date.now();
const responseTime = (endTime - startTime) / 1000; // Convert to seconds
if (!usage) {
throw new Error('Could not get token usage from response');
}
const tokensPerSecond = usage.completion_tokens / responseTime;
console.log(
`\n\n${provider.name}: Total tokens: ${
usage.total_tokens
}, Prompt tokens: ${usage.prompt_tokens}, Completion tokens: ${
usage.completion_tokens
}, Response time: ${responseTime.toFixed(
2
)}s, First response latency: ${firstResponseTime.toFixed(
2
)}s, Speed: ${tokensPerSecond.toFixed(2)} tokens/s, Response length: ${
content.length
} chars`
);
return {
name: provider.name,
totalTokens: usage.total_tokens,
promptTokens: usage.prompt_tokens,
completionTokens: usage.completion_tokens,
responseTime: responseTime.toFixed(2),
firstResponseLatency: firstResponseTime.toFixed(2),
speed: tokensPerSecond.toFixed(2),
responseLength: content.length,
};
} catch (error) {
if (
error.code === 'ETIMEDOUT' ||
error.name === 'AbortError' ||
error.message?.includes('timeout')
) {
console.error(`\n${provider.name} timed out after 10 seconds`);
} else {
console.error(`\nError during ${provider.name} benchmark:`, error);
}
return {
name: provider.name,
error: error.message,
speed: null,
totalTokens: null,
promptTokens: null,
completionTokens: null,
responseTime: null,
};
}
}
// Parse command line arguments
const showOutput = process.argv.includes('--show-output');
// Run benchmarks for all providers
async function runAllBenchmarks() {
console.log('Running benchmarks...');
console.log('showOutput:', showOutput);
console.log('testPrompt:', testPrompt);
const results = [];
const errors = {};
for (const [key, provider] of Object.entries(providers)) {
if (provider.skip) {
console.log(`\nSkipping ${provider.name} as configured...`);
continue;
}
const result = await measureSpeed(provider, showOutput);
if (result.error) {
errors[provider.name] = result.error;
} else {
results.push(result);
}
}
const timestamp = new Date().toISOString();
// Create JSON output
const jsonData = {
timestamp,
results: Object.fromEntries([
...results.map((result) => [result.name, parseFloat(result.speed)]),
...Object.keys(errors).map((name) => [name, null]), // Add null for errored providers
]),
details: [
...results,
...Object.entries(errors).map(([name, error]) => ({
name,
error,
speed: null,
totalTokens: null,
promptTokens: null,
completionTokens: null,
responseTime: null,
firstResponseLatency: null,
responseLength: null,
})),
],
};
// Write JSON file
const jsonOutputPath = path.join('outputs', `${timestamp}.json`);
fs.writeFileSync(jsonOutputPath, JSON.stringify(jsonData, null, 2));
console.log(`\nJSON results written to ${jsonOutputPath}`);
// Create text output - only the final results
let output = `=== Final Benchmark Results ===\n`;
output += `Current time: ${timestamp}\n`;
output += `Test prompt: ${testPrompt}\n\n`;
// Sort results by speed in descending order, putting errors at the end
const allResults = [
...results,
...Object.entries(errors).map(([name, error]) => ({
name,
speed: '0',
totalTokens: 'N/A',
promptTokens: 'N/A',
completionTokens: 'N/A',
responseTime: 'N/A',
firstResponseLatency: 'N/A',
responseLength: 'N/A',
error,
})),
];
allResults.sort((a, b) => {
if (a.error && !b.error) return 1;
if (!a.error && b.error) return -1;
return parseFloat(b.speed) - parseFloat(a.speed);
});
allResults.forEach((result) => {
if (result.error) {
output += `${result.name.padEnd(10)}: ERROR - ${result.error}\n`;
} else {
output += `${result.name.padEnd(10)}: Speed: ${
result.speed
} tokens/s, Total: ${result.totalTokens} tokens, Prompt: ${
result.promptTokens
} tokens, Completion: ${result.completionTokens} tokens, Time: ${
result.responseTime
}s, Latency: ${result.firstResponseLatency}s, Length: ${
result.responseLength
} chars\n`;
}
});
// Write text file
const txtOutputPath = path.join('outputs', `${timestamp}.txt`);
fs.writeFileSync(txtOutputPath, output);
console.log(`Text results written to ${txtOutputPath}`);
// Print final results to console as well
console.log(output);
}
runAllBenchmarks();