@just-every/ensemble
Version:
LLM provider abstraction layer with unified streaming interface
1,550 lines • 46.9 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.MODEL_REGISTRY = exports.MODEL_CLASSES = void 0;
exports.findModel = findModel;
const external_models_js_1 = require("../utils/external_models.cjs");
exports.MODEL_CLASSES = {
standard: {
models: [
'gpt-4.1',
'gemini-2.5-flash-preview-05-20-low',
'claude-3-5-haiku-latest',
'grok-3-mini-fast',
'deepseek-chat',
],
random: true,
},
mini: {
models: [
'gpt-4.1-nano',
'claude-3-5-haiku-latest',
'gemini-2.5-flash-lite-preview-06-17',
'grok-3-mini',
'meta-llama/llama-4-scout',
'mistral/ministral-8b',
],
random: true,
},
reasoning: {
models: [
'gemini-2.5-pro-preview-06-05',
'gemini-2.5-flash-preview-05-20-max',
'o4-mini-high',
'o3-pro',
'o3-high',
'claude-opus-4-20250514-max',
'claude-sonnet-4-20250514-max',
'claude-3-7-sonnet-latest-max',
'mistralai/magistral-medium-2506:thinking',
],
random: true,
},
reasoning_mini: {
models: [
'gemini-2.5-flash-preview-05-20-medium',
'grok-3-mini-fast',
'o3-low',
],
random: true,
},
monologue: {
models: [
'gemini-2.5-pro-preview-06-05',
'gemini-2.5-flash-preview-05-20-medium',
'o4-mini-low',
'o3-medium',
'claude-sonnet-4-20250514-medium',
],
random: true,
},
metacognition: {
models: [
'gemini-2.5-pro-preview-06-05',
'gemini-2.5-flash-preview-05-20-high',
'o3-high',
'claude-sonnet-4-20250514-medium',
'grok-3-mini-fast',
],
random: true,
},
code: {
models: [
'codex-mini-latest',
'claude-opus-4-20250514-medium',
'claude-sonnet-4-20250514-max',
'o3-high',
'gemini-2.5-flash-preview-05-20-medium',
],
random: true,
},
writing: {
models: [
'gemini-2.5-flash-lite-preview-06-17',
'gpt-4.1-mini',
],
random: true,
},
summary: {
models: [
'gemini-2.5-flash-lite-preview-06-17',
'gpt-4.1-mini',
],
random: true,
},
vision: {
models: [
'o4-mini-medium',
'o3-medium',
'gemini-2.5-flash-preview-05-20-max',
'gemini-2.5-pro-preview-06-05',
'claude-opus-4-20250514-low',
'claude-sonnet-4-20250514-max',
],
random: true,
},
vision_mini: {
models: [
'gpt-4.1',
'o3-low',
'o4-mini-low',
'gemini-2.5-flash-lite-preview-06-17',
'gemini-2.5-flash-preview-05-20',
'claude-sonnet-4-20250514-low',
],
random: true,
},
search: {
models: [
'gpt-4.1',
'deepseek-reasoner',
'gemini-2.5-flash-lite-preview-06-17',
'perplexity/sonar-deep-research',
],
random: true,
},
long: {
models: [
'gpt-4.1',
'gpt-4.1-nano',
'gpt-4.1-mini',
'gemini-2.5-pro-preview-06-05',
'gemini-2.5-flash-preview-05-20-medium',
'gemini-2.5-flash-preview-05-20-low',
'gemini-2.5-flash-lite-preview-06-17',
],
random: true,
description: 'Models with very large context windows (near 1M tokens) for processing long documents',
},
image_generation: {
models: [
'gpt-image-1',
'imagen-3.0-generate-002',
],
},
embedding: {
models: [
'text-embedding-3-small',
'gemini-embedding-exp-03-07',
],
description: 'Vector embedding models for semantic search and RAG',
},
voice: {
models: [
'gpt-4o-mini-tts',
'tts-1',
'tts-1-hd',
'eleven_multilingual_v2',
'eleven_turbo_v2_5',
'eleven_flash_v2_5',
'gemini-2.5-flash-preview-tts',
'gemini-2.5-pro-preview-tts',
],
description: 'Text-to-Speech models for voice generation',
},
transcription: {
models: [
'gemini-2.0-flash-live-001',
],
description: 'Speech-to-Text models for audio transcription with real-time streaming',
},
};
exports.MODEL_REGISTRY = [
{
id: 'text-embedding-3-small',
provider: 'openai',
cost: {
input_per_million: 0.02,
output_per_million: 0,
},
features: {
input_modality: ['text'],
output_modality: ['embedding'],
input_token_limit: 8191,
},
embedding: true,
dim: 1536,
class: 'embedding',
description: "OpenAI's small embedding model, good balance of performance and cost",
},
{
id: 'text-embedding-3-large',
provider: 'openai',
cost: {
input_per_million: 0.13,
output_per_million: 0,
},
features: {
input_modality: ['text'],
output_modality: ['embedding'],
input_token_limit: 8191,
},
embedding: true,
dim: 3072,
class: 'embedding',
description: "OpenAI's large embedding model, good balance of performance and cost",
},
{
id: 'gemini-embedding-exp-03-07',
provider: 'google',
cost: {
input_per_million: 0,
output_per_million: 0,
},
features: {
input_modality: ['text'],
output_modality: ['embedding'],
input_token_limit: 8191,
},
embedding: true,
dim: 768,
class: 'embedding',
description: "Google's experimental embedding model optimized for semantic similarity",
},
{
id: 'meta-llama/llama-4-maverick',
provider: 'openrouter',
cost: {
input_per_million: 0.18,
output_per_million: 0.6,
},
features: {
context_length: 1048576,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'standard',
score: 78,
scores: {
monologue: 72,
code: 64,
reasoning: 56,
},
description: 'Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total).',
},
{
id: 'meta-llama/llama-4-scout',
provider: 'openrouter',
cost: {
input_per_million: 0.08,
output_per_million: 0.3,
},
features: {
context_length: 327680,
input_modality: ['text'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'mini',
score: 65,
description: 'Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B.',
},
{
id: 'qwen/qwen3-235b-a22b',
provider: 'openrouter',
cost: {
input_per_million: 0.1,
output_per_million: 0.1,
},
features: {
context_length: 40960,
input_modality: ['text'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'reasoning',
score: 83,
scores: {
monologue: 73,
code: 62,
reasoning: 57,
},
description: 'Qwen3-235B-A22B is a 235B parameter mixture-of-experts (MoE) model developed by Qwen, activating 22B parameters per forward pass.',
},
{
id: 'qwen/qwen-max',
provider: 'openrouter',
cost: {
input_per_million: 1.6,
output_per_million: 6.4,
},
features: {
context_length: 131072,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'reasoning',
score: 80,
scores: {
monologue: 73,
code: 61,
reasoning: 57,
},
description: 'Qwen-Max, based on Qwen2.5, provides the best inference performance among Qwen models, especially for complex multi-step tasks.',
},
{
id: 'mistral/ministral-8b',
provider: 'openrouter',
cost: {
input_per_million: 0.1,
output_per_million: 0.1,
},
features: {
context_length: 131072,
input_modality: ['text'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'standard',
score: 55,
description: 'Ministral 8B is a state-of-the-art language model optimized for on-device and edge computing. Designed for efficiency in knowledge-intensive tasks, commonsense reasoning, and function-calling.',
},
{
id: 'grok-3',
aliases: ['grok-3-2025-02-11'],
provider: 'xai',
cost: {
input_per_million: 3.0,
output_per_million: 15.0,
},
features: {
context_length: 131_072,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'standard',
score: 78,
scores: {
monologue: 80,
code: 70,
reasoning: 65,
},
description: 'Flagship Grok-3 model for complex reasoning and generation',
},
{
id: 'grok-3-fast',
aliases: ['grok-3-fast-2025-04-11'],
provider: 'xai',
cost: {
input_per_million: 5.0,
output_per_million: 25.0,
},
features: {
context_length: 131_072,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'standard',
score: 78,
scores: {
monologue: 80,
code: 70,
reasoning: 65,
},
description: 'Same Grok-3 weights on premium infra for lower latency',
},
{
id: 'grok-3-mini',
aliases: ['grok-3-mini-2025-04-11'],
provider: 'xai',
cost: {
input_per_million: 0.3,
output_per_million: 0.5,
},
features: {
context_length: 131_072,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'mini',
score: 60,
scores: {
monologue: 62,
code: 55,
reasoning: 50,
},
description: 'Lightweight Grok-3 Mini—budget model for logic tasks',
},
{
id: 'grok-3-mini-fast',
aliases: ['grok-3-mini-fast-2025-04-11'],
provider: 'xai',
cost: {
input_per_million: 0.6,
output_per_million: 4.0,
},
features: {
context_length: 131_072,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'mini',
score: 60,
scores: {
monologue: 62,
code: 55,
reasoning: 50,
},
description: 'Grok-3 Mini on accelerated hardware for latency-critical use',
},
{
id: 'gpt-4.1',
aliases: ['gpt-4.1-2025-04-14'],
provider: 'openai',
cost: {
input_per_million: 2.0,
cached_input_per_million: 0.5,
output_per_million: 8.0,
},
features: {
context_length: 1048576,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'standard',
score: 82,
scores: {
monologue: 86,
code: 83,
reasoning: 71,
},
description: 'Flagship GPT model for complex tasks',
},
{
id: 'gpt-4.1-mini',
aliases: ['gpt-4.1-mini-2025-04-14'],
provider: 'openai',
cost: {
input_per_million: 0.4,
cached_input_per_million: 0.1,
output_per_million: 1.6,
},
features: {
context_length: 1048576,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'mini',
score: 75,
description: 'Balanced for intelligence, speed, and cost',
},
{
id: 'gpt-4.1-nano',
aliases: ['gpt-4.1-nano-2025-04-14'],
provider: 'openai',
cost: {
input_per_million: 0.1,
cached_input_per_million: 0.025,
output_per_million: 0.4,
},
features: {
context_length: 1048576,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'mini',
score: 60,
description: 'Fastest, most cost-effective GPT-4.1 model',
},
{
id: 'gpt-4.5-preview',
aliases: ['gpt-4.5-preview-2025-02-27'],
provider: 'openai',
cost: {
input_per_million: 75.0,
cached_input_per_million: 37.5,
output_per_million: 150.0,
},
features: {
context_length: 128000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'standard',
description: 'Latest premium GPT model from OpenAI',
},
{
id: 'gpt-4o',
aliases: ['gpt-4o-2024-08-06'],
provider: 'openai',
cost: {
input_per_million: 2.5,
cached_input_per_million: 1.25,
output_per_million: 10.0,
},
features: {
context_length: 128000,
input_modality: ['text', 'image', 'audio'],
output_modality: ['text', 'audio'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'standard',
score: 80,
description: 'OpenAI standard model with multimodal capabilities',
},
{
id: 'gpt-4o-mini',
aliases: ['gpt-4o-mini-2024-07-18'],
provider: 'openai',
cost: {
input_per_million: 0.15,
cached_input_per_million: 0.075,
output_per_million: 0.6,
},
features: {
context_length: 128000,
input_modality: ['text', 'image', 'audio'],
output_modality: ['text', 'audio'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'mini',
score: 65,
scores: {
monologue: 70,
code: 63,
reasoning: 60,
},
description: 'Smaller, faster version of GPT-4o',
},
{
id: 'o4-mini',
aliases: ['o4-mini-2025-04-16', 'o4-mini-low', 'o4-mini-medium', 'o4-mini-high'],
provider: 'openai',
cost: {
input_per_million: 1.1,
cached_input_per_million: 0.275,
output_per_million: 4.4,
},
features: {
context_length: 200000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'reasoning',
score: 80,
scores: {
monologue: 85,
code: 82,
reasoning: 76,
},
description: 'Faster, more affordable reasoning model',
},
{
id: 'o3',
aliases: ['o3-2025-04-16'],
provider: 'openai',
cost: {
input_per_million: 2,
cached_input_per_million: 0.5,
output_per_million: 8,
},
features: {
context_length: 200000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'reasoning',
score: 85,
scores: {
monologue: 87,
code: 84,
reasoning: 79,
},
description: 'Powerful reasoning model',
},
{
id: 'o3-pro',
aliases: ['o3-pro-2025-06-10'],
provider: 'openai',
cost: {
input_per_million: 20,
output_per_million: 80,
},
features: {
context_length: 200000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
json_output: true,
},
class: 'reasoning',
score: 85,
scores: {
monologue: 87,
code: 84,
reasoning: 79,
},
description: 'Most powerful reasoning model',
},
{
id: 'o1',
aliases: ['o1-2024-12-17'],
provider: 'openai',
cost: {
input_per_million: 15.0,
cached_input_per_million: 7.5,
output_per_million: 60.0,
},
features: {
context_length: 200000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'reasoning',
description: 'Advanced reasoning model from OpenAI',
},
{
id: 'o1-pro',
aliases: ['o1-pro-2025-03-19'],
provider: 'openai',
cost: {
input_per_million: 150.0,
output_per_million: 600.0,
},
features: {
context_length: 200000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: false,
json_output: true,
},
class: 'reasoning',
score: 90,
description: 'Premium O-series model from OpenAI, highest reasoning capability',
},
{
id: 'o4-mini',
aliases: ['o4-mini-2025-01-31', 'o1-mini', 'o1-mini-2024-09-12'],
provider: 'openai',
cost: {
input_per_million: 1.1,
cached_input_per_million: 0.55,
output_per_million: 4.4,
},
features: {
context_length: 200000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'reasoning',
score: 70,
description: 'Smaller O-series model with reasoning capabilities',
},
{
id: 'computer-use-preview',
aliases: ['computer-use-preview-2025-03-11'],
provider: 'openai',
cost: {
input_per_million: 3.0,
output_per_million: 12.0,
},
features: {
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'vision',
description: 'Model that can understand and control computer interfaces',
},
{
id: 'claude-3-7-sonnet-latest',
aliases: ['claude-3-7-sonnet'],
provider: 'anthropic',
cost: {
input_per_million: 3.0,
output_per_million: 15.0,
cached_input_per_million: 0.3,
},
features: {
context_length: 200000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
max_output_tokens: 64000,
},
class: 'reasoning',
score: 85,
scores: {
monologue: 83,
code: 77,
reasoning: 69,
},
description: 'Latest Claude model with strong reasoning capabilities (extended thinking internal)',
},
{
id: 'claude-3-5-haiku-latest',
aliases: ['claude-3-5-haiku'],
provider: 'anthropic',
cost: {
input_per_million: 0.8,
output_per_million: 4.0,
cached_input_per_million: 0.08,
},
features: {
context_length: 200000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
max_output_tokens: 8192,
},
class: 'mini',
score: 70,
scores: {
monologue: 66,
code: 63,
reasoning: 55,
},
description: 'Fast, cost-effective Claude model',
},
{
id: 'claude-cli',
provider: 'anthropic',
cost: {
input_per_million: 3.0,
output_per_million: 15.0,
cached_input_per_million: 0.3,
},
features: {
context_length: 200000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'reasoning',
description: 'Claude accessed via CLI (likely uses latest Sonnet or Haiku model)',
},
{
id: 'claude-opus-4-20250514',
aliases: ['claude-opus-4', 'claude-4-opus'],
provider: 'anthropic',
cost: {
input_per_million: 15.0,
output_per_million: 75.0,
cached_input_per_million: 1.5,
},
features: {
context_length: 200000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
max_output_tokens: 32000,
reasoning_output: true,
},
class: 'reasoning',
score: 95,
description: 'Claude Opus 4 - Highest level of intelligence and capability with extended thinking',
},
{
id: 'claude-sonnet-4-20250514',
aliases: ['claude-sonnet-4', 'claude-4-sonnet'],
provider: 'anthropic',
cost: {
input_per_million: 3.0,
output_per_million: 15.0,
cached_input_per_million: 0.3,
},
features: {
context_length: 200000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
max_output_tokens: 64000,
reasoning_output: true,
},
class: 'reasoning',
score: 90,
description: 'Claude Sonnet 4 - High intelligence and balanced performance with extended thinking',
},
{
id: 'gemini-2.5-pro-preview-06-05',
aliases: ['gemini-2.5-pro', 'gemini-2.5-pro-exp-03-25', 'gemini-2.5-pro-preview-05-06'],
provider: 'google',
cost: {
input_per_million: {
threshold_tokens: 200000,
price_below_threshold_per_million: 1.25,
price_above_threshold_per_million: 2.5,
},
output_per_million: {
threshold_tokens: 200000,
price_below_threshold_per_million: 10.0,
price_above_threshold_per_million: 15.0,
},
},
features: {
context_length: 1048576,
input_modality: ['text', 'image', 'video', 'audio'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
max_output_tokens: 65536,
},
class: 'reasoning',
score: 80,
description: 'Paid preview of Gemini 2.5 Pro. State-of-the-art multipurpose model.',
},
{
id: 'gemini-2.5-flash-preview-05-20',
aliases: ['gemini-2.5-flash', 'gemini-2.5-flash-preview-04-17'],
provider: 'google',
cost: {
input_per_million: 0.3,
output_per_million: 2.5,
},
features: {
context_length: 1048576,
input_modality: ['text', 'image', 'video', 'audio'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
max_output_tokens: 65536,
},
class: 'reasoning',
score: 75,
scores: {
monologue: 12,
code: 63,
reasoning: 78,
},
description: 'Balanced multimodal model with large context, built for Agents.',
},
{
id: 'gemini-2.5-flash-lite-preview-06-17',
aliases: ['gemini-2.5-flash-lite'],
provider: 'google',
cost: {
input_per_million: 0.1,
output_per_million: 0.4,
},
features: {
context_length: 1000000,
input_modality: ['text', 'image', 'video', 'audio'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
max_output_tokens: 64000,
},
class: 'reasoning_mini',
score: 75,
scores: {
monologue: 12,
code: 63,
reasoning: 78,
},
description: 'Balanced multimodal model with large context, built for Agents.',
},
{
id: 'gemini-2.0-flash-lite',
provider: 'google',
cost: {
input_per_million: 0.075,
output_per_million: 0.3,
},
features: {
context_length: 1048576,
input_modality: ['text', 'image', 'video', 'audio'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
max_output_tokens: 8192,
},
class: 'standard',
score: 75,
scores: {
monologue: 70,
code: 55,
reasoning: 56,
},
description: 'Lite multimodal model with large context, built for Agents.',
},
{
id: 'gemini-2.0-flash',
provider: 'google',
cost: {
input_per_million: 0.1,
output_per_million: 0.4,
cached_input_per_million: 0.025,
},
features: {
context_length: 1048576,
input_modality: ['text', 'image', 'video', 'audio'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
max_output_tokens: 8192,
},
class: 'standard',
score: 75,
scores: {
monologue: 70,
code: 55,
reasoning: 56,
},
description: 'Balanced multimodal model with large context, built for Agents.',
},
{
id: 'gpt-image-1',
provider: 'openai',
cost: {
per_image: 0.042,
},
features: {
input_modality: ['text', 'image'],
output_modality: ['image'],
streaming: false,
},
class: 'image_generation',
description: "OpenAI's GPT-Image-1 model for text-to-image generation. Supports quality levels (low: $0.011-0.016, medium: $0.042-0.063, high: $0.167-0.25) and sizes (1024x1024, 1024x1536, 1536x1024).",
},
{
id: 'gpt-4o-mini-tts',
provider: 'openai',
cost: {
input_per_million: 0.6,
output_per_million: 12.0,
},
features: {
input_modality: ['text'],
output_modality: ['audio'],
streaming: true,
},
class: 'voice',
description: "OpenAI's advanced text-to-speech model with natural-sounding output. Supports customizable tone, style, and emotion through instructions. 85% cheaper than ElevenLabs with estimated $0.015/minute of audio.",
},
{
id: 'tts-1',
provider: 'openai',
cost: {
input_per_million: 15.0,
output_per_million: 0,
},
features: {
input_modality: ['text'],
output_modality: ['audio'],
streaming: true,
},
class: 'voice',
description: "OpenAI's standard text-to-speech model, optimized for real-time use. Supports 6 voices and multiple audio formats.",
},
{
id: 'tts-1-hd',
provider: 'openai',
cost: {
input_per_million: 30.0,
output_per_million: 0,
},
features: {
input_modality: ['text'],
output_modality: ['audio'],
streaming: true,
},
class: 'voice',
description: "OpenAI's high-definition text-to-speech model for superior audio quality. Supports 6 voices and multiple audio formats.",
},
{
id: 'eleven_multilingual_v2',
provider: 'elevenlabs',
cost: {
input_per_million: 55,
output_per_million: 0,
},
features: {
input_modality: ['text'],
output_modality: ['audio'],
streaming: true,
},
class: 'voice',
description: "ElevenLabs' multilingual high quality text-to-speech model supporting 29 languages with natural voice capabilities.",
},
{
id: 'eleven_turbo_v2_5',
provider: 'elevenlabs',
cost: {
input_per_million: 27.5,
output_per_million: 0,
},
features: {
input_modality: ['text'],
output_modality: ['audio'],
streaming: true,
},
class: 'voice',
description: "ElevenLabs' turbo model optimized for low-latency text-to-speech with high quality output.",
},
{
id: 'eleven_flash_v2_5',
provider: 'elevenlabs',
cost: {
input_per_million: 27.5,
output_per_million: 0,
},
features: {
input_modality: ['text'],
output_modality: ['audio'],
streaming: true,
},
class: 'voice',
description: "ElevenLabs' fastest model optimized for ultra low-latency text-to-speech.",
},
{
id: 'gemini-2.5-flash-preview-tts',
provider: 'google',
cost: {
input_per_million: 10.0,
output_per_million: 0,
},
features: {
input_modality: ['text'],
output_modality: ['audio'],
streaming: true,
context_length: 32000,
},
class: 'voice',
description: "Gemini's fast text-to-speech model with support for 24 languages and 30 distinct voices. Optimized for low-latency applications.",
},
{
id: 'gemini-2.5-pro-preview-tts',
provider: 'google',
cost: {
input_per_million: 20.0,
output_per_million: 0,
},
features: {
input_modality: ['text'],
output_modality: ['audio'],
streaming: true,
context_length: 32000,
},
class: 'voice',
description: "Gemini's advanced text-to-speech model with superior voice quality, expression control, and multi-speaker support for creating dynamic conversations.",
},
{
id: 'codex-mini-latest',
provider: 'openai',
cost: {
input_per_million: 1.5,
cached_input_per_million: 0.375,
output_per_million: 6.0,
},
features: {
context_length: 200000,
max_output_tokens: 100000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: false,
streaming: true,
json_output: true,
reasoning_output: true,
},
class: 'code',
description: 'Fine-tuned o4-mini model for Codex CLI with reasoning token support',
},
{
id: 'perplexity/sonar',
provider: 'openrouter',
cost: {
input_per_million: 1.0,
output_per_million: 1.0,
},
features: {
context_length: 32768,
input_modality: ['text'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'standard',
description: 'Lightweight, cost-effective search model designed for quick, grounded answers.',
},
{
id: 'perplexity/sonar-pro',
provider: 'openrouter',
cost: {
input_per_million: 3.0,
output_per_million: 15.0,
},
features: {
context_length: 32768,
input_modality: ['text'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'standard',
description: 'Advanced search model optimized for complex queries and deeper content understanding.',
},
{
id: 'perplexity/sonar-reasoning',
provider: 'openrouter',
cost: {
input_per_million: 1.0,
output_per_million: 5.0,
},
features: {
context_length: 32768,
input_modality: ['text'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'reasoning_mini',
description: 'Quick problem-solving and reasoning model, ideal for evaluating complex queries.',
},
{
id: 'perplexity/sonar-reasoning-pro',
provider: 'openrouter',
cost: {
input_per_million: 2.0,
output_per_million: 8.0,
},
features: {
context_length: 32768,
input_modality: ['text'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'reasoning',
description: 'Enhanced reasoning model with multi-step problem-solving capabilities and real-time search.',
},
{
id: 'perplexity/sonar-deep-research',
provider: 'openrouter',
cost: {
input_per_million: 2.0,
output_per_million: 8.0,
},
features: {
context_length: 32768,
input_modality: ['text'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'standard',
description: 'Best suited for exhaustive research, generating detailed reports and in-depth insights.',
},
{
id: 'mistralai/magistral-small-2506',
provider: 'openrouter',
cost: {
input_per_million: 0.5,
output_per_million: 1.5,
},
features: {
context_length: 40000,
input_modality: ['text'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'reasoning_mini',
score: 72,
description: 'Magistral Small is a 24B parameter instruction-tuned model based on Mistral-Small-3.1 (2503), enhanced through supervised fine-tuning on traces from Magistral Medium and further refined via reinforcement learning. It is optimized for reasoning and supports a wide multilingual range, including over 20 languages.',
},
{
id: 'mistralai/magistral-medium-2506:thinking',
provider: 'openrouter',
cost: {
input_per_million: 2.0,
output_per_million: 5.0,
},
features: {
context_length: 40960,
input_modality: ['text'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
reasoning_output: true,
},
class: 'reasoning',
score: 80,
description: "Magistral is Mistral's first reasoning model. It is ideal for general purpose use requiring longer thought processing and better accuracy than with non-reasoning LLMs. From legal research and financial forecasting to software development and creative storytelling — this model solves multi-step challenges where transparency and precision are critical.",
},
{
id: 'test-model',
provider: 'test',
cost: {
input_per_million: 0,
output_per_million: 0,
},
features: {
context_length: 8192,
input_modality: ['text'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'standard',
scores: {
monologue: 50,
code: 50,
reasoning: 50,
},
description: 'Test model for unit testing purposes',
},
{
id: 'dall-e-3',
provider: 'openai',
cost: {
per_image: 0.04,
},
features: {
input_modality: ['text'],
output_modality: ['image'],
},
class: 'image_generation',
description: "OpenAI's DALL-E 3 model for high-quality image generation",
},
{
id: 'dall-e-2',
provider: 'openai',
cost: {
per_image: 0.02,
},
features: {
input_modality: ['text', 'image'],
output_modality: ['image'],
},
class: 'image_generation',
description: "OpenAI's DALL-E 2 model, supports image editing and variations",
},
{
id: 'imagen-3.0-generate-002',
aliases: ['imagen-3'],
provider: 'google',
cost: {
per_image: 0.04,
},
features: {
input_modality: ['text'],
output_modality: ['image'],
},
class: 'image_generation',
description: "Google's Imagen 3 model for high-quality image generation",
},
{
id: 'imagen-2',
provider: 'google',
cost: {
per_image: 0.02,
},
features: {
input_modality: ['text'],
output_modality: ['image'],
},
class: 'image_generation',
description: "Google's Imagen 2 model for image generation",
},
{
id: 'gemini-live-2.5-flash-preview',
provider: 'google',
cost: {
input_per_million: {
text: 0.5,
audio: 3.0,
video: 3.0,
},
output_per_million: {
text: 2.0,
audio: 12.0,
},
},
features: {
context_length: 32000,
input_modality: ['text', 'audio', 'video'],
output_modality: ['text', 'audio'],
streaming: true,
},
class: 'transcription',
description: 'Gemini Live API for real-time multimodal interaction with modality-specific pricing',
},
{
id: 'gemini-2.0-flash-live-001',
provider: 'google',
cost: {
input_per_million: {
text: 0.35,
audio: 2.1,
video: 2.1,
},
output_per_million: {
text: 1.5,
audio: 8.5,
},
},
features: {
context_length: 32000,
input_modality: ['text', 'audio', 'video'],
output_modality: ['text', 'audio'],
streaming: true,
},
class: 'transcription',
description: 'Gemini 2.0 Flash Live API for real-time multimodal interaction',
},
{
id: 'gpt-4o-transcribe',
provider: 'openai',
cost: {
input_per_million: {
audio: 6.0,
},
output_per_million: {
text: 0,
},
},
features: {
context_length: 128000,
input_modality: ['audio'],
output_modality: ['text'],
streaming: true,
},
class: 'transcription',
description: 'GPT-4o transcription with incremental streaming output',
},
{
id: 'gpt-4o-mini-transcribe',
provider: 'openai',
cost: {
input_per_million: {
audio: 6.0,
},
output_per_million: {
text: 0,
},
},
features: {
context_length: 128000,
input_modality: ['audio'],
output_modality: ['text'],
streaming: true,
},
class: 'transcription',
description: 'GPT-4o Mini transcription with incremental streaming output',
},
{
id: 'whisper-1',
provider: 'openai',
cost: {
input_per_million: {
audio: 6.0,
},
output_per_million: {
text: 0,
},
},
features: {
context_length: 25600,
input_modality: ['audio'],
output_modality: ['text'],
streaming: true,
},
class: 'transcription',
description: 'OpenAI Whisper transcription with full-turn output',
},
{
id: 'deepseek-chat',
aliases: ['deepseek-v3-0324'],
provider: 'deepseek',
cost: {
input_per_million: {
peak_utc_start_hour: 0,
peak_utc_start_minute: 30,
peak_utc_end_hour: 16,
peak_utc_end_minute: 30,
peak_price_per_million: 0.27,
off_peak_price_per_million: 0.135,
},
cached_input_per_million: {
peak_utc_start_hour: 0,
peak_utc_start_minute: 30,
peak_utc_end_hour: 16,
peak_utc_end_minute: 30,
peak_price_per_million: 0.07,
off_peak_price_per_million: 0.035,
},
output_per_million: {
peak_utc_start_hour: 0,
peak_utc_start_minute: 30,
peak_utc_end_hour: 16,
peak_utc_end_minute: 30,
peak_price_per_million: 1.1,
off_peak_price_per_million: 0.55,
},
},
features: {
context_length: 64000,
max_output_tokens: 8192,
input_modality: ['text'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'standard',
score: 75,
description: 'DeepSeek-V3 chat model with FIM completion support and time-based pricing',
},
{
id: 'deepseek-reasoner',
aliases: ['deepseek-r1-0528'],
provider: 'deepseek',
cost: {
input_per_million: {
peak_utc_start_hour: 0,
peak_utc_start_minute: 30,
peak_utc_end_hour: 16,
peak_utc_end_minute: 30,
peak_price_per_million: 0.55,
off_peak_price_per_million: 0.1375,
},
cached_input_per_million: {
peak_utc_start_hour: 0,
peak_utc_start_minute: 30,
peak_utc_end_hour: 16,
peak_utc_end_minute: 30,
peak_price_per_million: 0.14,
off_peak_price_per_million: 0.035,
},
output_per_million: {
peak_utc_start_hour: 0,
peak_utc_start_minute: 30,
peak_utc_end_hour: 16,
peak_utc_end_minute: 30,
peak_price_per_million: 2.19,
off_peak_price_per_million: 0.5475,
},
},
features: {
context_length: 64000,
max_output_tokens: 64000,
input_modality: ['text'],
output_modality: ['text'],
tool_use: true,
simulate_tools: true,
streaming: true,
json_output: true,
reasoning_output: true,
},
class: 'reasoning',
score: 85,
description: 'DeepSeek-R1 advanced reasoning model with extended output and time-based pricing',
},
];
function findModel(modelId) {
const externalModel = (0, external_models_js_1.getExternalModel)(modelId);
if (externalModel)
return externalModel;
const directMatch = exports.MODEL_REGISTRY.find(model => model.id === modelId);
if (directMatch)
return directMatch;
const aliasMatch = exports.MODEL_REGISTRY.find(model => model.aliases?.includes(modelId));
if (aliasMatch)
return aliasMatch;
const suffixes = ['-low', '-medium', '-high', '-max'];
for (const suffix of suffixes) {
if (modelId.endsWith(suffix)) {
const baseName = modelId.slice(0, -suffix.length);
return findModel(baseName);
}
}
return undefined;
}
//# sourceMappingURL=model_data.js.map