@just-every/ensemble
Version:
LLM provider abstraction layer with unified streaming interface
1,673 lines (1,672 loc) • 80.9 kB
JavaScript
import { getExternalModel } from '../utils/external_models.js';
export const MODEL_CLASSES = {
standard: {
models: [
'gpt-5.2-chat-latest',
'gemini-3-flash-preview',
'claude-sonnet-4-5-20250929',
'grok-4',
],
random: true,
},
mini: {
models: [
'gpt-5-nano',
'gemini-2.5-flash-lite',
'claude-haiku-4-5-20251001',
'grok-3-mini',
],
random: true,
},
reasoning: {
models: [
'gpt-5.2',
'gemini-2.5-pro',
'claude-sonnet-4-5-20250929',
'grok-4-1-fast-reasoning',
],
random: true,
},
reasoning_high: {
models: [
'gpt-5.2-pro',
'gemini-3.1-pro-preview',
'claude-opus-4-5-20251101',
'grok-4',
],
random: true,
},
reasoning_mini: {
models: [
'gpt-5-mini',
'gemini-3-flash-preview',
'claude-sonnet-4-5-20250929',
'grok-3-mini',
],
random: true,
},
monologue: {
models: [
'gpt-5.2-chat-latest',
'gemini-3.1-pro-preview',
'claude-sonnet-4-5-20250929',
'grok-4',
],
random: true,
},
metacognition: {
models: [
'gpt-5.2',
'gemini-3.1-pro-preview',
'claude-opus-4-5-20251101',
'grok-4',
],
random: true,
},
code: {
models: [
'gpt-5.1-codex-max',
'gemini-3.1-pro-preview',
'claude-opus-4-5-20251101',
'grok-4',
'qwen3-coder',
],
random: true,
},
writing: {
models: [
'gpt-5.2-chat-latest',
'gemini-3-flash-preview',
'claude-sonnet-4-5-20250929',
'grok-4',
],
random: true,
},
summary: {
models: [
'gpt-5-nano',
'gemini-2.5-flash-lite',
'claude-haiku-4-5-20251001',
'grok-3-mini',
],
random: true,
},
vision: {
models: [
'gpt-5.2',
'gemini-3.1-pro-preview',
'claude-opus-4-5-20251101',
'grok-4',
],
random: true,
},
vision_mini: {
models: [
'gpt-5-mini',
'gemini-3-flash-preview',
'claude-haiku-4-5-20251001',
'grok-3-mini',
],
random: true,
},
search: {
models: [
'o3-deep-research',
'perplexity/sonar-deep-research',
],
random: true,
},
long: {
models: [
'gpt-5.2',
'gemini-3.1-pro-preview',
'claude-opus-4-5-20251101',
'grok-4',
],
random: true,
description: 'Models with very large context windows for processing long documents',
},
image_generation: {
models: [
'gpt-image-1.5',
'gemini-3-pro-image-preview',
'seedream-4',
'luma-photon-1',
'ideogram-3.0',
'midjourney-v7',
'flux-kontext-pro',
'stability-ultra',
'runway-gen4-image',
'recraft-v3',
],
},
embedding: {
models: [
'text-embedding-3-large',
'text-embedding-004',
],
description: 'Vector embedding models for semantic search and RAG',
},
voice: {
models: [
'tts-1-hd',
'eleven_multilingual_v2',
'gemini-2.5-pro-preview-tts',
],
description: 'Text-to-Speech models for voice generation',
},
transcription: {
models: [
'gpt-4o-transcribe',
'gemini-2.5-flash-native-audio-preview-12-2025',
],
description: 'Speech-to-Text models for audio transcription with real-time streaming',
},
};
export const MODEL_REGISTRY = [
{
id: 'seedream-4',
aliases: ['seedream-4.0', 'bytedance/seedream-4', 'byteplus/seedream-4'],
provider: 'bytedance',
cost: { per_image: 0.03 },
features: { input_modality: ['text', 'image'], output_modality: ['image'] },
class: 'image_generation',
description: 'Seedream 4.0 text-to-image via BytePlus ModelArk (OpenAI-compatible Images API).',
},
{
id: 'flux-kontext-pro',
provider: 'fireworks',
cost: { per_image: 0.04 },
features: { input_modality: ['text', 'image'], output_modality: ['image'] },
class: 'image_generation',
description: 'FLUX.1 Kontext Pro via Fireworks (async workflow with polling).',
},
{
id: 'stability-ultra',
aliases: ['stability-ultra-1', 'stable-image-ultra'],
provider: 'stability',
cost: { per_image: 0.08 },
features: { input_modality: ['text', 'image'], output_modality: ['image'] },
class: 'image_generation',
description: 'Stable Image Ultra (v2beta) – photorealistic, 1MP default.',
},
{
id: 'runway-gen4-image',
provider: 'runway',
cost: { per_image: 0.08 },
features: { input_modality: ['text', 'image'], output_modality: ['image'] },
class: 'image_generation',
description: 'Runway Gen‑4 Image via official Runway API.',
},
{
id: 'runway-gen4-image-turbo',
provider: 'runway',
cost: { per_image: 0.02 },
features: { input_modality: ['text', 'image'], output_modality: ['image'] },
class: 'image_generation',
description: 'Runway Gen‑4 Image Turbo via official Runway API.',
},
{
id: 'flux-pro-1.1',
provider: 'fireworks',
cost: { per_image: 0.04 },
features: { input_modality: ['text'], output_modality: ['image'] },
class: 'image_generation',
description: 'FLUX Pro 1.1 (fast, high quality). Uses Fireworks or FAL fallback.',
},
{
id: 'flux-schnell',
provider: 'fireworks',
cost: { per_image: 0.02 },
features: { input_modality: ['text'], output_modality: ['image'] },
class: 'image_generation',
description: 'FLUX Schnell (very fast). Uses Fireworks or FAL fallback.',
},
{
id: 'sd3.5-large',
provider: 'stability',
cost: { per_image: 0.08 },
features: { input_modality: ['text', 'image'], output_modality: ['image'] },
class: 'image_generation',
description: 'Stability SD3.5 Large.',
},
{
id: 'sd3.5-large-turbo',
provider: 'stability',
cost: { per_image: 0.10 },
features: { input_modality: ['text', 'image'], output_modality: ['image'] },
class: 'image_generation',
description: 'Stability SD3.5 Large Turbo.',
},
{
id: 'sd3.5-medium',
provider: 'stability',
cost: { per_image: 0.05 },
features: { input_modality: ['text', 'image'], output_modality: ['image'] },
class: 'image_generation',
description: 'Stability SD3.5 Medium.',
},
{
id: 'sd3.5-flash',
provider: 'stability',
cost: { per_image: 0.02 },
features: { input_modality: ['text', 'image'], output_modality: ['image'] },
class: 'image_generation',
description: 'Stability SD3.5 Flash (fast).',
},
{
id: 'recraft-v3',
provider: 'fal',
cost: { per_image: 0.04 },
features: { input_modality: ['text'], output_modality: ['image'] },
class: 'image_generation',
description: 'Recraft V3 via FAL.ai (text‑to‑image / vector styles).',
},
{
id: 'text-embedding-3-small',
provider: 'openai',
cost: {
input_per_million: 0.02,
output_per_million: 0,
},
features: {
input_modality: ['text'],
output_modality: ['embedding'],
input_token_limit: 8191,
},
embedding: true,
dim: 1536,
class: 'embedding',
description: "OpenAI's small embedding model, good balance of performance and cost",
},
{
id: 'text-embedding-3-large',
provider: 'openai',
cost: {
input_per_million: 0.13,
output_per_million: 0,
},
features: {
input_modality: ['text'],
output_modality: ['embedding'],
input_token_limit: 8191,
},
embedding: true,
dim: 3072,
class: 'embedding',
description: "OpenAI's large embedding model, good balance of performance and cost",
},
{
id: 'gemini-embedding-exp-03-07',
provider: 'google',
cost: {
input_per_million: 0,
output_per_million: 0,
},
features: {
input_modality: ['text'],
output_modality: ['embedding'],
input_token_limit: 8191,
},
embedding: true,
dim: 768,
class: 'embedding',
description: "Google's experimental embedding model optimized for semantic similarity",
},
{
id: 'text-embedding-004',
provider: 'google',
cost: {
input_per_million: 0,
output_per_million: 0,
},
features: {
input_modality: ['text'],
output_modality: ['embedding'],
input_token_limit: 8191,
},
embedding: true,
dim: 768,
class: 'embedding',
description: "Google's stable text embedding model (text-embedding-004)",
},
{
id: 'meta-llama/llama-4-maverick',
provider: 'openrouter',
cost: {
input_per_million: 0.18,
output_per_million: 0.6,
},
features: {
context_length: 1048576,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'standard',
score: 78,
scores: {
monologue: 72,
code: 64,
reasoning: 56,
},
description: 'Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total).',
},
{
id: 'meta-llama/llama-4-scout',
provider: 'openrouter',
cost: {
input_per_million: 0.08,
output_per_million: 0.3,
},
features: {
context_length: 327680,
input_modality: ['text'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'mini',
score: 65,
description: 'Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B.',
},
{
id: 'qwen/qwen3-235b-a22b',
provider: 'openrouter',
cost: {
input_per_million: 0.1,
output_per_million: 0.1,
},
features: {
context_length: 40960,
input_modality: ['text'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'reasoning',
score: 83,
scores: {
monologue: 73,
code: 62,
reasoning: 57,
},
description: 'Qwen3-235B-A22B is a 235B parameter mixture-of-experts (MoE) model developed by Qwen, activating 22B parameters per forward pass.',
},
{
id: 'qwen/qwen-max',
provider: 'openrouter',
cost: {
input_per_million: 1.6,
output_per_million: 6.4,
},
features: {
context_length: 131072,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'reasoning',
score: 80,
scores: {
monologue: 73,
code: 61,
reasoning: 57,
},
description: 'Qwen-Max, based on Qwen2.5, provides the best inference performance among Qwen models, especially for complex multi-step tasks.',
},
{
id: 'mistral/ministral-8b',
provider: 'openrouter',
cost: {
input_per_million: 0.1,
output_per_million: 0.1,
},
features: {
context_length: 131072,
input_modality: ['text'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'standard',
score: 55,
description: 'Ministral 8B is a state-of-the-art language model optimized for on-device and edge computing. Designed for efficiency in knowledge-intensive tasks, commonsense reasoning, and function-calling.',
},
{
id: 'grok-4-1-fast-reasoning',
aliases: ['grok-4.1-fast-reasoning'],
provider: 'xai',
cost: {
input_per_million: {
threshold_tokens: 128_000,
price_below_threshold_per_million: 0.2,
price_above_threshold_per_million: 0.5,
},
output_per_million: {
threshold_tokens: 128_000,
price_below_threshold_per_million: 0.5,
price_above_threshold_per_million: 1.0,
},
cached_input_per_million: 0.05,
},
features: {
context_length: 2_000_000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'reasoning',
score: 88,
scores: {
monologue: 90,
code: 85,
reasoning: 88,
},
description: 'Grok 4.1 Fast with extended reasoning. 2M context, tiered pricing at 128k threshold.',
},
{
id: 'grok-4-1-fast-non-reasoning',
aliases: ['grok-4.1-fast-non-reasoning'],
provider: 'xai',
cost: {
input_per_million: {
threshold_tokens: 128_000,
price_below_threshold_per_million: 0.2,
price_above_threshold_per_million: 0.5,
},
output_per_million: {
threshold_tokens: 128_000,
price_below_threshold_per_million: 0.5,
price_above_threshold_per_million: 1.0,
},
cached_input_per_million: 0.05,
},
features: {
context_length: 2_000_000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'standard',
score: 85,
scores: {
monologue: 87,
code: 82,
reasoning: 80,
},
description: 'Grok 4.1 Fast without reasoning. 2M context, tiered pricing at 128k threshold.',
},
{
id: 'grok-4',
aliases: ['grok-4-2025-09-01'],
provider: 'xai',
cost: {
input_per_million: 3.0,
output_per_million: 15.0,
cached_input_per_million: 0.75,
},
features: {
context_length: 256_000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'reasoning',
score: 90,
scores: {
monologue: 92,
code: 88,
reasoning: 85,
},
description: 'Grok-4 premium model with 256k context and vision capabilities.',
},
{
id: 'grok-4-fast-reasoning',
aliases: ['grok-4-fast-reasoning-2025-09-01'],
provider: 'xai',
cost: {
input_per_million: {
threshold_tokens: 128_000,
price_below_threshold_per_million: 0.2,
price_above_threshold_per_million: 0.5,
},
output_per_million: {
threshold_tokens: 128_000,
price_below_threshold_per_million: 0.5,
price_above_threshold_per_million: 1.0,
},
cached_input_per_million: 0.05,
},
features: {
context_length: 2_000_000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'reasoning',
score: 87,
scores: {
monologue: 89,
code: 84,
reasoning: 86,
},
description: 'Grok-4 Fast with reasoning (Sep 2025). 2M context, tiered pricing at 128k threshold.',
},
{
id: 'grok-4-fast-non-reasoning',
aliases: ['grok-4-fast-non-reasoning-2025-09-01'],
provider: 'xai',
cost: {
input_per_million: {
threshold_tokens: 128_000,
price_below_threshold_per_million: 0.2,
price_above_threshold_per_million: 0.5,
},
output_per_million: {
threshold_tokens: 128_000,
price_below_threshold_per_million: 0.5,
price_above_threshold_per_million: 1.0,
},
cached_input_per_million: 0.05,
},
features: {
context_length: 2_000_000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'standard',
score: 84,
scores: {
monologue: 86,
code: 81,
reasoning: 79,
},
description: 'Grok-4 Fast without reasoning (Sep 2025). 2M context, tiered pricing at 128k threshold.',
},
{
id: 'grok-3',
aliases: ['grok-3-2025-02-11'],
provider: 'xai',
cost: {
input_per_million: 3.0,
output_per_million: 15.0,
},
features: {
context_length: 131_072,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'standard',
score: 78,
scores: {
monologue: 80,
code: 70,
reasoning: 65,
},
description: 'Grok-3 model with 131k context.',
},
{
id: 'grok-3-mini',
aliases: ['grok-3-mini-2025-04-11'],
provider: 'xai',
cost: {
input_per_million: 0.3,
output_per_million: 0.5,
},
features: {
context_length: 131_072,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'mini',
score: 60,
scores: {
monologue: 62,
code: 55,
reasoning: 50,
},
description: 'Grok-3 Mini—budget model with 131k context.',
},
{
id: 'grok-3-mini-accelerated',
aliases: ['grok-3-mini-accelerated-2025-04-11'],
provider: 'xai',
cost: {
input_per_million: 0.6,
output_per_million: 4.0,
},
features: {
context_length: 131_072,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'mini',
score: 60,
scores: {
monologue: 62,
code: 55,
reasoning: 50,
},
description: 'Grok-3 Mini on accelerated hardware for lower latency.',
},
{
id: 'grok-2',
aliases: ['grok-2-text'],
provider: 'xai',
cost: {
input_per_million: 2.0,
output_per_million: 10.0,
},
features: {
context_length: 128_000,
input_modality: ['text'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'standard',
score: 70,
scores: {
monologue: 72,
code: 65,
reasoning: 60,
},
description: 'Legacy Grok-2 text-only model.',
},
{
id: 'grok-2-vision',
aliases: ['grok-2-vision'],
provider: 'xai',
cost: {
input_per_million: 2.0,
output_per_million: 10.0,
},
features: {
context_length: 128_000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'vision',
score: 70,
scores: {
monologue: 72,
code: 65,
reasoning: 60,
},
description: 'Legacy Grok-2 model with vision capabilities.',
},
{
id: 'gpt-4.1',
aliases: ['gpt-4.1-2025-04-14'],
provider: 'openai',
cost: {
input_per_million: 2.0,
cached_input_per_million: 0.5,
output_per_million: 8.0,
},
features: {
context_length: 1048576,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'standard',
score: 82,
scores: {
monologue: 86,
code: 83,
reasoning: 71,
},
description: 'Flagship GPT model for complex tasks',
},
{
id: 'gpt-4.1-mini',
aliases: ['gpt-4.1-mini-2025-04-14'],
provider: 'openai',
cost: {
input_per_million: 0.4,
cached_input_per_million: 0.1,
output_per_million: 1.6,
},
features: {
context_length: 1048576,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'mini',
score: 75,
description: 'Balanced for intelligence, speed, and cost',
},
{
id: 'gpt-4.1-nano',
aliases: ['gpt-4.1-nano-2025-04-14'],
provider: 'openai',
cost: {
input_per_million: 0.1,
cached_input_per_million: 0.025,
output_per_million: 0.4,
},
features: {
context_length: 1048576,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'mini',
score: 60,
description: 'Fastest, most cost-effective GPT-4.1 model',
},
{
id: 'gpt-4.5-preview',
aliases: ['gpt-4.5-preview-2025-02-27'],
provider: 'openai',
cost: {
input_per_million: 75.0,
cached_input_per_million: 37.5,
output_per_million: 150.0,
},
features: {
context_length: 128000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'standard',
description: 'Latest premium GPT model from OpenAI',
},
{
id: 'gpt-5.2',
aliases: ['gpt-5.2-2025-12-11'],
provider: 'openai',
cost: {
input_per_million: 1.75,
cached_input_per_million: 0.175,
output_per_million: 14.0,
},
features: {
context_length: 400000,
max_output_tokens: 128000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'reasoning',
score: 96,
scores: {
monologue: 97,
code: 95,
reasoning: 94,
},
description: 'Frontier flagship model for complex, multi-step tasks (400k/128k).',
},
{
id: 'gpt-5.2-pro',
aliases: ['gpt-5.2-pro-2025-12-11'],
provider: 'openai',
cost: {
input_per_million: 21.0,
output_per_million: 168.0,
},
features: {
context_length: 400000,
max_output_tokens: 128000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'reasoning',
score: 99,
description: 'Highest-accuracy GPT-5.2 variant for the hardest problems (400k/128k).',
},
{
id: 'gpt-5.2-chat-latest',
provider: 'openai',
cost: {
input_per_million: 1.75,
cached_input_per_million: 0.175,
output_per_million: 14.0,
},
features: {
context_length: 128000,
max_output_tokens: 16384,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'standard',
score: 94,
description: 'ChatGPT-optimized GPT-5.2 chat model (128k/16k).',
},
{
id: 'gpt-5',
aliases: ['gpt-5-2025-08-07'],
provider: 'openai',
cost: {
input_per_million: 1.25,
cached_input_per_million: 0.125,
output_per_million: 10.0,
},
features: {
context_length: 400000,
max_output_tokens: 128000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'reasoning',
score: 95,
scores: {
monologue: 96,
code: 95,
reasoning: 92,
},
description: 'Frontier flagship model for complex, multi-step tasks (400k/128k).',
},
{
id: 'gpt-5-pro',
aliases: ['gpt-5-pro-2025-10-06'],
provider: 'openai',
cost: {
input_per_million: 15.0,
output_per_million: 120.0,
},
features: {
context_length: 400000,
max_output_tokens: 272000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'reasoning',
score: 98,
description: 'High-accuracy GPT-5 variant for the hardest problems (400k/128k).',
},
{
id: 'gpt-5-chat-latest',
aliases: ['gpt-5-chat'],
provider: 'openai',
cost: {
input_per_million: 1.25,
cached_input_per_million: 0.125,
output_per_million: 10.0,
},
features: {
context_length: 128000,
max_output_tokens: 16384,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'standard',
score: 92,
description: 'ChatGPT-optimized GPT-5 chat model (128k/16k).',
},
{
id: 'gpt-5-mini',
aliases: ['gpt-5-mini-2025-08-07'],
provider: 'openai',
cost: {
input_per_million: 0.25,
cached_input_per_million: 0.025,
output_per_million: 2.0,
},
features: {
context_length: 400000,
max_output_tokens: 128000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'standard',
score: 88,
scores: {
monologue: 88,
code: 87,
reasoning: 85,
},
description: 'A faster, more cost-efficient version of GPT-5 (400k/128k).',
},
{
id: 'gpt-5-nano',
aliases: ['gpt-5-nano-2025-08-07'],
provider: 'openai',
cost: {
input_per_million: 0.05,
cached_input_per_million: 0.005,
output_per_million: 0.4,
},
features: {
context_length: 400000,
max_output_tokens: 128000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'mini',
score: 78,
scores: {
monologue: 78,
code: 76,
reasoning: 75,
},
description: 'Fastest, most cost-efficient GPT-5 model (400k/128k).',
},
{
id: 'gpt-5.1',
aliases: ['gpt-5.1-2025-11-13'],
provider: 'openai',
cost: {
input_per_million: 1.25,
cached_input_per_million: 0.125,
output_per_million: 10.0,
},
features: {
context_length: 400000,
max_output_tokens: 128000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'reasoning',
score: 95,
description: 'Prior-gen GPT-5.1 flagship model (400k/128k).',
},
{
id: 'gpt-5.1-chat-latest',
provider: 'openai',
cost: {
input_per_million: 1.25,
cached_input_per_million: 0.125,
output_per_million: 10.0,
},
features: {
context_length: 128000,
max_output_tokens: 16384,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'standard',
score: 91,
description: 'ChatGPT-optimized GPT-5.1 chat model (128k/16k).',
},
{
id: 'gpt-5-codex',
aliases: ['gpt-5-codex-2025-09-15'],
provider: 'openai',
cost: {
input_per_million: 1.25,
cached_input_per_million: 0.125,
output_per_million: 10.0,
},
features: {
context_length: 400000,
max_output_tokens: 128000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'code',
score: 90,
description: 'Prior-gen Codex model for agentic coding (400k/128k).',
},
{
id: 'gpt-5.1-codex',
provider: 'openai',
cost: {
input_per_million: 1.25,
cached_input_per_million: 0.125,
output_per_million: 10.0,
},
features: {
context_length: 400000,
max_output_tokens: 128000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'code',
score: 92,
description: 'GPT-5.1 Codex — optimized for agentic coding tasks (400k/128k).',
},
{
id: 'gpt-5.1-codex-mini',
provider: 'openai',
cost: {
input_per_million: 0.25,
cached_input_per_million: 0.025,
output_per_million: 2.0,
},
features: {
context_length: 400000,
max_output_tokens: 128000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'code',
score: 86,
description: 'GPT-5.1 Codex Mini — lightweight coding model with 400k context.',
},
{
id: 'gpt-5.1-codex-max',
provider: 'openai',
cost: {
input_per_million: 1.25,
cached_input_per_million: 0.125,
output_per_million: 10.0,
},
features: {
context_length: 400000,
max_output_tokens: 128000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'code',
score: 95,
description: 'GPT-5.1 Codex Max — most capable Codex model for coding agents (400k/128k).',
},
{
id: 'gpt-4o',
aliases: ['gpt-4o-2024-08-06'],
provider: 'openai',
cost: {
input_per_million: 2.5,
cached_input_per_million: 1.25,
output_per_million: 10.0,
},
features: {
context_length: 128000,
input_modality: ['text', 'image', 'audio'],
output_modality: ['text', 'audio'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'standard',
score: 80,
description: 'OpenAI standard model with multimodal capabilities',
},
{
id: 'gpt-4o-mini',
aliases: ['gpt-4o-mini-2024-07-18'],
provider: 'openai',
cost: {
input_per_million: 0.15,
cached_input_per_million: 0.075,
output_per_million: 0.6,
},
features: {
context_length: 128000,
input_modality: ['text', 'image', 'audio'],
output_modality: ['text', 'audio'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'mini',
score: 65,
scores: {
monologue: 70,
code: 63,
reasoning: 60,
},
description: 'Smaller, faster version of GPT-4o',
},
{
id: 'o4-mini',
aliases: ['o4-mini-2025-04-16', 'o4-mini-low', 'o4-mini-medium', 'o4-mini-high'],
provider: 'openai',
cost: {
input_per_million: 1.1,
cached_input_per_million: 0.275,
output_per_million: 4.4,
},
features: {
context_length: 200000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'reasoning',
score: 80,
scores: {
monologue: 85,
code: 82,
reasoning: 76,
},
description: 'Faster, more affordable reasoning model',
},
{
id: 'o3',
aliases: ['o3-2025-04-16'],
provider: 'openai',
cost: {
input_per_million: 2,
cached_input_per_million: 0.5,
output_per_million: 8,
},
features: {
context_length: 200000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'reasoning',
score: 85,
scores: {
monologue: 87,
code: 84,
reasoning: 79,
},
description: 'Powerful reasoning model',
},
{
id: 'o3-pro',
aliases: ['o3-pro-2025-06-10'],
provider: 'openai',
cost: {
input_per_million: 20,
output_per_million: 80,
},
features: {
context_length: 200000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
json_output: true,
},
class: 'reasoning',
score: 85,
scores: {
monologue: 87,
code: 84,
reasoning: 79,
},
description: 'Most powerful reasoning model',
},
{
id: 'o3-deep-research',
aliases: ['o3-deep-research-2025-06-26'],
provider: 'openai',
cost: {
input_per_million: 10.0,
cached_input_per_million: 2.5,
output_per_million: 40.0,
},
features: {
context_length: 200000,
input_modality: ['text', 'image', 'audio'],
output_modality: ['text', 'image', 'audio'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'reasoning',
score: 90,
scores: {
monologue: 92,
code: 89,
reasoning: 88,
},
description: 'Our most powerful deep research model',
},
{
id: 'o1',
aliases: ['o1-2024-12-17'],
provider: 'openai',
cost: {
input_per_million: 15.0,
cached_input_per_million: 7.5,
output_per_million: 60.0,
},
features: {
context_length: 200000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'reasoning',
description: 'Advanced reasoning model from OpenAI',
},
{
id: 'o1-pro',
aliases: ['o1-pro-2025-03-19'],
provider: 'openai',
cost: {
input_per_million: 150.0,
output_per_million: 600.0,
},
features: {
context_length: 200000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: false,
json_output: true,
},
class: 'reasoning',
score: 90,
description: 'Premium O-series model from OpenAI, highest reasoning capability',
},
{
id: 'o4-mini',
aliases: ['o4-mini-2025-01-31', 'o1-mini', 'o1-mini-2024-09-12'],
provider: 'openai',
cost: {
input_per_million: 1.1,
cached_input_per_million: 0.55,
output_per_million: 4.4,
},
features: {
context_length: 200000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'reasoning',
score: 70,
description: 'Smaller O-series model with reasoning capabilities',
},
{
id: 'computer-use-preview',
aliases: ['computer-use-preview-2025-03-11'],
provider: 'openai',
cost: {
input_per_million: 3.0,
output_per_million: 12.0,
},
features: {
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'vision',
description: 'Model that can understand and control computer interfaces',
},
{
id: 'claude-sonnet-4-5-20250514',
aliases: ['claude-sonnet-4.5-2025-05-14', 'claude-sonnet-4-5-may-2025'],
provider: 'anthropic',
cost: {
input_per_million: 3.0,
output_per_million: 15.0,
cached_input_per_million: 0.3,
},
features: {
context_length: 200000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
reasoning_output: true,
max_output_tokens: 64000,
},
class: 'reasoning',
score: 88,
scores: {
monologue: 86,
code: 85,
reasoning: 84,
},
description: 'Claude Sonnet 4.5 - Latest general-purpose model with strong reasoning and text/image support',
},
{
id: 'claude-sonnet-4-5-20250929',
aliases: [
'claude-sonnet-4-5',
'claude-sonnet-4.5',
'claude-sonnet-latest',
'claude-sonnet-4-5-sep-2025',
'claude-sonnet-4.5-2025-09-29',
],
provider: 'anthropic',
cost: {
input_per_million: {
threshold_tokens: 200000,
price_below_threshold_per_million: 3.0,
price_above_threshold_per_million: 6.0,
},
output_per_million: {
threshold_tokens: 200000,
price_below_threshold_per_million: 15.0,
price_above_threshold_per_million: 22.5,
},
cached_input_per_million: {
threshold_tokens: 200000,
price_below_threshold_per_million: 0.3,
price_above_threshold_per_million: 0.6,
},
},
features: {
context_length: 200000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
reasoning_output: true,
max_output_tokens: 64000,
},
class: 'reasoning',
score: 88,
description: 'Claude Sonnet 4.5 (Sep 2025 refresh) with tiered pricing at 200k tokens and vision support.',
},
{
id: 'claude-sonnet-4-5-20250514-long-context',
aliases: ['claude-sonnet-4-5-long', 'claude-sonnet-4.5-long'],
provider: 'anthropic',
cost: {
input_per_million: 6.0,
output_per_million: 22.5,
cached_input_per_million: 0.6,
},
features: {
context_length: 1_000_000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
reasoning_output: true,
max_output_tokens: 64000,
},
class: 'reasoning',
score: 88,
description: 'Claude Sonnet 4.5 with 1M token context window - for long-context processing',
},
{
id: 'claude-haiku-4-5-20250514',
aliases: ['claude-haiku-4.5-2025-05-14', 'claude-haiku-4-5-may-2025'],
provider: 'anthropic',
cost: {
input_per_million: 1.0,
output_per_million: 5.0,
cached_input_per_million: 0.1,
},
features: {
context_length: 200000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
max_output_tokens: 8192,
},
class: 'mini',
score: 74,
scores: {
monologue: 72,
code: 71,
reasoning: 70,
},
description: 'Claude Haiku 4.5 - Fast, cost-effective model with text and image support',
},
{
id: 'claude-haiku-4-5-20251001',
aliases: [
'claude-haiku-4-5',
'claude-haiku-4.5',
'claude-haiku-latest',
'claude-haiku-4-5-oct-2025',
'claude-haiku-4.5-2025-10-01',
],
provider: 'anthropic',
cost: {
input_per_million: 1.0,
output_per_million: 5.0,
cached_input_per_million: 0.1,
},
features: {
context_length: 200000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
max_output_tokens: 8192,
},
class: 'mini',
score: 74,
description: 'Claude Haiku 4.5 (Oct 2025 refresh) fast tier with vision support.',
},
{
id: 'claude-cli',
provider: 'anthropic',
cost: {
input_per_million: 3.0,
output_per_million: 15.0,
cached_input_per_million: 0.3,
},
features: {
context_length: 200000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
},
class: 'reasoning',
description: 'Claude accessed via CLI (likely uses latest Sonnet or Haiku model)',
},
{
id: 'claude-opus-4-1-20250805',
aliases: ['claude-opus-4-1', 'claude-opus-4.1', 'claude-4-opus'],
provider: 'anthropic',
cost: {
input_per_million: 15.0,
output_per_million: 75.0,
cached_input_per_million: 1.5,
},
features: {
context_length: 200000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
reasoning_output: true,
max_output_tokens: 32000,
},
class: 'reasoning',
score: 95,
scores: {
monologue: 94,
code: 94,
reasoning: 93,
},
description: 'Claude Opus 4.1 - Highest intelligence and capability with reasoning support',
},
{
id: 'claude-opus-4-5-20251101',
aliases: [
'claude-opus-4-5',
'claude-opus-4.5',
'claude-opus',
'claude-opus-latest',
'claude-4.5-opus',
],
provider: 'anthropic',
cost: {
input_per_million: 15.0,
output_per_million: 75.0,
cached_input_per_million: 1.5,
},
features: {
context_length: 200000,
input_modality: ['text', 'image'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
reasoning_output: true,
max_output_tokens: 32000,
},
class: 'reasoning',
score: 96,
description: 'Claude Opus 4.5 - Latest Opus model with strongest reasoning, code, and long-form capabilities.',
},
{
id: 'gemini-2.5-pro-preview-06-05',
aliases: ['gemini-2.5-pro', 'gemini-2.5-pro-exp-03-25', 'gemini-2.5-pro-preview-05-06'],
provider: 'google',
cost: {
input_per_million: 1.25,
output_per_million: 10.0,
cached_input_per_million: 0.13,
},
features: {
context_length: 1048576,
input_modality: ['text', 'image', 'video', 'audio'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
max_output_tokens: 65536,
},
class: 'reasoning',
score: 80,
description: 'Paid preview of Gemini 2.5 Pro. State-of-the-art multipurpose model.',
},
{
id: 'gemini-3-flash-preview',
aliases: ['gemini-3-flash'],
provider: 'google',
cost: {
input_per_million: {
text: 0.5,
image: 0.5,
video: 0.5,
audio: 1.0,
},
output_per_million: 3.0,
},
features: {
context_length: 1048576,
input_modality: ['text', 'image', 'video', 'audio'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
max_output_tokens: 65536,
},
class: 'standard',
description: 'Gemini 3 Flash Preview - fast multimodal model with 1M context window.',
},
{
id: 'gemini-2.5-flash',
aliases: [
'gemini-2.5-flash-preview-05-20',
'gemini-2.5-flash-preview-04-17',
],
provider: 'google',
cost: {
input_per_million: {
text: 1.0,
audio: 0.0375,
},
output_per_million: 0.6,
},
features: {
context_length: 1048576,
input_modality: ['text', 'image', 'video', 'audio'],
output_modality: ['text'],
tool_use: true,
streaming: true,
json_output: true,
max_output_tokens: 65536,
},
class: 'reasoning',
score: 75,
scores: {
monologue: 12,
code: 63,
reasoning: 78,
},
description: 'Balanced multimodal model with large context, built for Agents.',
},
{
id: 'gemini-2.5-flash-lite',
aliases: ['gemini-2.5-flash-lite-preview-06-17'],
provider: 'google',