UNPKG

@just-every/ensemble

Version:

LLM provider abstraction layer with unified streaming interface

1,673 lines (1,672 loc) 80.9 kB
import { getExternalModel } from '../utils/external_models.js'; export const MODEL_CLASSES = { standard: { models: [ 'gpt-5.2-chat-latest', 'gemini-3-flash-preview', 'claude-sonnet-4-5-20250929', 'grok-4', ], random: true, }, mini: { models: [ 'gpt-5-nano', 'gemini-2.5-flash-lite', 'claude-haiku-4-5-20251001', 'grok-3-mini', ], random: true, }, reasoning: { models: [ 'gpt-5.2', 'gemini-2.5-pro', 'claude-sonnet-4-5-20250929', 'grok-4-1-fast-reasoning', ], random: true, }, reasoning_high: { models: [ 'gpt-5.2-pro', 'gemini-3.1-pro-preview', 'claude-opus-4-5-20251101', 'grok-4', ], random: true, }, reasoning_mini: { models: [ 'gpt-5-mini', 'gemini-3-flash-preview', 'claude-sonnet-4-5-20250929', 'grok-3-mini', ], random: true, }, monologue: { models: [ 'gpt-5.2-chat-latest', 'gemini-3.1-pro-preview', 'claude-sonnet-4-5-20250929', 'grok-4', ], random: true, }, metacognition: { models: [ 'gpt-5.2', 'gemini-3.1-pro-preview', 'claude-opus-4-5-20251101', 'grok-4', ], random: true, }, code: { models: [ 'gpt-5.1-codex-max', 'gemini-3.1-pro-preview', 'claude-opus-4-5-20251101', 'grok-4', 'qwen3-coder', ], random: true, }, writing: { models: [ 'gpt-5.2-chat-latest', 'gemini-3-flash-preview', 'claude-sonnet-4-5-20250929', 'grok-4', ], random: true, }, summary: { models: [ 'gpt-5-nano', 'gemini-2.5-flash-lite', 'claude-haiku-4-5-20251001', 'grok-3-mini', ], random: true, }, vision: { models: [ 'gpt-5.2', 'gemini-3.1-pro-preview', 'claude-opus-4-5-20251101', 'grok-4', ], random: true, }, vision_mini: { models: [ 'gpt-5-mini', 'gemini-3-flash-preview', 'claude-haiku-4-5-20251001', 'grok-3-mini', ], random: true, }, search: { models: [ 'o3-deep-research', 'perplexity/sonar-deep-research', ], random: true, }, long: { models: [ 'gpt-5.2', 'gemini-3.1-pro-preview', 'claude-opus-4-5-20251101', 'grok-4', ], random: true, description: 'Models with very large context windows for processing long documents', }, image_generation: { models: [ 'gpt-image-1.5', 'gemini-3-pro-image-preview', 'seedream-4', 'luma-photon-1', 'ideogram-3.0', 'midjourney-v7', 'flux-kontext-pro', 'stability-ultra', 'runway-gen4-image', 'recraft-v3', ], }, embedding: { models: [ 'text-embedding-3-large', 'text-embedding-004', ], description: 'Vector embedding models for semantic search and RAG', }, voice: { models: [ 'tts-1-hd', 'eleven_multilingual_v2', 'gemini-2.5-pro-preview-tts', ], description: 'Text-to-Speech models for voice generation', }, transcription: { models: [ 'gpt-4o-transcribe', 'gemini-2.5-flash-native-audio-preview-12-2025', ], description: 'Speech-to-Text models for audio transcription with real-time streaming', }, }; export const MODEL_REGISTRY = [ { id: 'seedream-4', aliases: ['seedream-4.0', 'bytedance/seedream-4', 'byteplus/seedream-4'], provider: 'bytedance', cost: { per_image: 0.03 }, features: { input_modality: ['text', 'image'], output_modality: ['image'] }, class: 'image_generation', description: 'Seedream 4.0 text-to-image via BytePlus ModelArk (OpenAI-compatible Images API).', }, { id: 'flux-kontext-pro', provider: 'fireworks', cost: { per_image: 0.04 }, features: { input_modality: ['text', 'image'], output_modality: ['image'] }, class: 'image_generation', description: 'FLUX.1 Kontext Pro via Fireworks (async workflow with polling).', }, { id: 'stability-ultra', aliases: ['stability-ultra-1', 'stable-image-ultra'], provider: 'stability', cost: { per_image: 0.08 }, features: { input_modality: ['text', 'image'], output_modality: ['image'] }, class: 'image_generation', description: 'Stable Image Ultra (v2beta) – photorealistic, 1MP default.', }, { id: 'runway-gen4-image', provider: 'runway', cost: { per_image: 0.08 }, features: { input_modality: ['text', 'image'], output_modality: ['image'] }, class: 'image_generation', description: 'Runway Gen‑4 Image via official Runway API.', }, { id: 'runway-gen4-image-turbo', provider: 'runway', cost: { per_image: 0.02 }, features: { input_modality: ['text', 'image'], output_modality: ['image'] }, class: 'image_generation', description: 'Runway Gen‑4 Image Turbo via official Runway API.', }, { id: 'flux-pro-1.1', provider: 'fireworks', cost: { per_image: 0.04 }, features: { input_modality: ['text'], output_modality: ['image'] }, class: 'image_generation', description: 'FLUX Pro 1.1 (fast, high quality). Uses Fireworks or FAL fallback.', }, { id: 'flux-schnell', provider: 'fireworks', cost: { per_image: 0.02 }, features: { input_modality: ['text'], output_modality: ['image'] }, class: 'image_generation', description: 'FLUX Schnell (very fast). Uses Fireworks or FAL fallback.', }, { id: 'sd3.5-large', provider: 'stability', cost: { per_image: 0.08 }, features: { input_modality: ['text', 'image'], output_modality: ['image'] }, class: 'image_generation', description: 'Stability SD3.5 Large.', }, { id: 'sd3.5-large-turbo', provider: 'stability', cost: { per_image: 0.10 }, features: { input_modality: ['text', 'image'], output_modality: ['image'] }, class: 'image_generation', description: 'Stability SD3.5 Large Turbo.', }, { id: 'sd3.5-medium', provider: 'stability', cost: { per_image: 0.05 }, features: { input_modality: ['text', 'image'], output_modality: ['image'] }, class: 'image_generation', description: 'Stability SD3.5 Medium.', }, { id: 'sd3.5-flash', provider: 'stability', cost: { per_image: 0.02 }, features: { input_modality: ['text', 'image'], output_modality: ['image'] }, class: 'image_generation', description: 'Stability SD3.5 Flash (fast).', }, { id: 'recraft-v3', provider: 'fal', cost: { per_image: 0.04 }, features: { input_modality: ['text'], output_modality: ['image'] }, class: 'image_generation', description: 'Recraft V3 via FAL.ai (text‑to‑image / vector styles).', }, { id: 'text-embedding-3-small', provider: 'openai', cost: { input_per_million: 0.02, output_per_million: 0, }, features: { input_modality: ['text'], output_modality: ['embedding'], input_token_limit: 8191, }, embedding: true, dim: 1536, class: 'embedding', description: "OpenAI's small embedding model, good balance of performance and cost", }, { id: 'text-embedding-3-large', provider: 'openai', cost: { input_per_million: 0.13, output_per_million: 0, }, features: { input_modality: ['text'], output_modality: ['embedding'], input_token_limit: 8191, }, embedding: true, dim: 3072, class: 'embedding', description: "OpenAI's large embedding model, good balance of performance and cost", }, { id: 'gemini-embedding-exp-03-07', provider: 'google', cost: { input_per_million: 0, output_per_million: 0, }, features: { input_modality: ['text'], output_modality: ['embedding'], input_token_limit: 8191, }, embedding: true, dim: 768, class: 'embedding', description: "Google's experimental embedding model optimized for semantic similarity", }, { id: 'text-embedding-004', provider: 'google', cost: { input_per_million: 0, output_per_million: 0, }, features: { input_modality: ['text'], output_modality: ['embedding'], input_token_limit: 8191, }, embedding: true, dim: 768, class: 'embedding', description: "Google's stable text embedding model (text-embedding-004)", }, { id: 'meta-llama/llama-4-maverick', provider: 'openrouter', cost: { input_per_million: 0.18, output_per_million: 0.6, }, features: { context_length: 1048576, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'standard', score: 78, scores: { monologue: 72, code: 64, reasoning: 56, }, description: 'Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total).', }, { id: 'meta-llama/llama-4-scout', provider: 'openrouter', cost: { input_per_million: 0.08, output_per_million: 0.3, }, features: { context_length: 327680, input_modality: ['text'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'mini', score: 65, description: 'Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B.', }, { id: 'qwen/qwen3-235b-a22b', provider: 'openrouter', cost: { input_per_million: 0.1, output_per_million: 0.1, }, features: { context_length: 40960, input_modality: ['text'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'reasoning', score: 83, scores: { monologue: 73, code: 62, reasoning: 57, }, description: 'Qwen3-235B-A22B is a 235B parameter mixture-of-experts (MoE) model developed by Qwen, activating 22B parameters per forward pass.', }, { id: 'qwen/qwen-max', provider: 'openrouter', cost: { input_per_million: 1.6, output_per_million: 6.4, }, features: { context_length: 131072, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'reasoning', score: 80, scores: { monologue: 73, code: 61, reasoning: 57, }, description: 'Qwen-Max, based on Qwen2.5, provides the best inference performance among Qwen models, especially for complex multi-step tasks.', }, { id: 'mistral/ministral-8b', provider: 'openrouter', cost: { input_per_million: 0.1, output_per_million: 0.1, }, features: { context_length: 131072, input_modality: ['text'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'standard', score: 55, description: 'Ministral 8B is a state-of-the-art language model optimized for on-device and edge computing. Designed for efficiency in knowledge-intensive tasks, commonsense reasoning, and function-calling.', }, { id: 'grok-4-1-fast-reasoning', aliases: ['grok-4.1-fast-reasoning'], provider: 'xai', cost: { input_per_million: { threshold_tokens: 128_000, price_below_threshold_per_million: 0.2, price_above_threshold_per_million: 0.5, }, output_per_million: { threshold_tokens: 128_000, price_below_threshold_per_million: 0.5, price_above_threshold_per_million: 1.0, }, cached_input_per_million: 0.05, }, features: { context_length: 2_000_000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'reasoning', score: 88, scores: { monologue: 90, code: 85, reasoning: 88, }, description: 'Grok 4.1 Fast with extended reasoning. 2M context, tiered pricing at 128k threshold.', }, { id: 'grok-4-1-fast-non-reasoning', aliases: ['grok-4.1-fast-non-reasoning'], provider: 'xai', cost: { input_per_million: { threshold_tokens: 128_000, price_below_threshold_per_million: 0.2, price_above_threshold_per_million: 0.5, }, output_per_million: { threshold_tokens: 128_000, price_below_threshold_per_million: 0.5, price_above_threshold_per_million: 1.0, }, cached_input_per_million: 0.05, }, features: { context_length: 2_000_000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'standard', score: 85, scores: { monologue: 87, code: 82, reasoning: 80, }, description: 'Grok 4.1 Fast without reasoning. 2M context, tiered pricing at 128k threshold.', }, { id: 'grok-4', aliases: ['grok-4-2025-09-01'], provider: 'xai', cost: { input_per_million: 3.0, output_per_million: 15.0, cached_input_per_million: 0.75, }, features: { context_length: 256_000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'reasoning', score: 90, scores: { monologue: 92, code: 88, reasoning: 85, }, description: 'Grok-4 premium model with 256k context and vision capabilities.', }, { id: 'grok-4-fast-reasoning', aliases: ['grok-4-fast-reasoning-2025-09-01'], provider: 'xai', cost: { input_per_million: { threshold_tokens: 128_000, price_below_threshold_per_million: 0.2, price_above_threshold_per_million: 0.5, }, output_per_million: { threshold_tokens: 128_000, price_below_threshold_per_million: 0.5, price_above_threshold_per_million: 1.0, }, cached_input_per_million: 0.05, }, features: { context_length: 2_000_000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'reasoning', score: 87, scores: { monologue: 89, code: 84, reasoning: 86, }, description: 'Grok-4 Fast with reasoning (Sep 2025). 2M context, tiered pricing at 128k threshold.', }, { id: 'grok-4-fast-non-reasoning', aliases: ['grok-4-fast-non-reasoning-2025-09-01'], provider: 'xai', cost: { input_per_million: { threshold_tokens: 128_000, price_below_threshold_per_million: 0.2, price_above_threshold_per_million: 0.5, }, output_per_million: { threshold_tokens: 128_000, price_below_threshold_per_million: 0.5, price_above_threshold_per_million: 1.0, }, cached_input_per_million: 0.05, }, features: { context_length: 2_000_000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'standard', score: 84, scores: { monologue: 86, code: 81, reasoning: 79, }, description: 'Grok-4 Fast without reasoning (Sep 2025). 2M context, tiered pricing at 128k threshold.', }, { id: 'grok-3', aliases: ['grok-3-2025-02-11'], provider: 'xai', cost: { input_per_million: 3.0, output_per_million: 15.0, }, features: { context_length: 131_072, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'standard', score: 78, scores: { monologue: 80, code: 70, reasoning: 65, }, description: 'Grok-3 model with 131k context.', }, { id: 'grok-3-mini', aliases: ['grok-3-mini-2025-04-11'], provider: 'xai', cost: { input_per_million: 0.3, output_per_million: 0.5, }, features: { context_length: 131_072, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'mini', score: 60, scores: { monologue: 62, code: 55, reasoning: 50, }, description: 'Grok-3 Mini—budget model with 131k context.', }, { id: 'grok-3-mini-accelerated', aliases: ['grok-3-mini-accelerated-2025-04-11'], provider: 'xai', cost: { input_per_million: 0.6, output_per_million: 4.0, }, features: { context_length: 131_072, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'mini', score: 60, scores: { monologue: 62, code: 55, reasoning: 50, }, description: 'Grok-3 Mini on accelerated hardware for lower latency.', }, { id: 'grok-2', aliases: ['grok-2-text'], provider: 'xai', cost: { input_per_million: 2.0, output_per_million: 10.0, }, features: { context_length: 128_000, input_modality: ['text'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'standard', score: 70, scores: { monologue: 72, code: 65, reasoning: 60, }, description: 'Legacy Grok-2 text-only model.', }, { id: 'grok-2-vision', aliases: ['grok-2-vision'], provider: 'xai', cost: { input_per_million: 2.0, output_per_million: 10.0, }, features: { context_length: 128_000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'vision', score: 70, scores: { monologue: 72, code: 65, reasoning: 60, }, description: 'Legacy Grok-2 model with vision capabilities.', }, { id: 'gpt-4.1', aliases: ['gpt-4.1-2025-04-14'], provider: 'openai', cost: { input_per_million: 2.0, cached_input_per_million: 0.5, output_per_million: 8.0, }, features: { context_length: 1048576, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'standard', score: 82, scores: { monologue: 86, code: 83, reasoning: 71, }, description: 'Flagship GPT model for complex tasks', }, { id: 'gpt-4.1-mini', aliases: ['gpt-4.1-mini-2025-04-14'], provider: 'openai', cost: { input_per_million: 0.4, cached_input_per_million: 0.1, output_per_million: 1.6, }, features: { context_length: 1048576, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'mini', score: 75, description: 'Balanced for intelligence, speed, and cost', }, { id: 'gpt-4.1-nano', aliases: ['gpt-4.1-nano-2025-04-14'], provider: 'openai', cost: { input_per_million: 0.1, cached_input_per_million: 0.025, output_per_million: 0.4, }, features: { context_length: 1048576, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'mini', score: 60, description: 'Fastest, most cost-effective GPT-4.1 model', }, { id: 'gpt-4.5-preview', aliases: ['gpt-4.5-preview-2025-02-27'], provider: 'openai', cost: { input_per_million: 75.0, cached_input_per_million: 37.5, output_per_million: 150.0, }, features: { context_length: 128000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'standard', description: 'Latest premium GPT model from OpenAI', }, { id: 'gpt-5.2', aliases: ['gpt-5.2-2025-12-11'], provider: 'openai', cost: { input_per_million: 1.75, cached_input_per_million: 0.175, output_per_million: 14.0, }, features: { context_length: 400000, max_output_tokens: 128000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'reasoning', score: 96, scores: { monologue: 97, code: 95, reasoning: 94, }, description: 'Frontier flagship model for complex, multi-step tasks (400k/128k).', }, { id: 'gpt-5.2-pro', aliases: ['gpt-5.2-pro-2025-12-11'], provider: 'openai', cost: { input_per_million: 21.0, output_per_million: 168.0, }, features: { context_length: 400000, max_output_tokens: 128000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'reasoning', score: 99, description: 'Highest-accuracy GPT-5.2 variant for the hardest problems (400k/128k).', }, { id: 'gpt-5.2-chat-latest', provider: 'openai', cost: { input_per_million: 1.75, cached_input_per_million: 0.175, output_per_million: 14.0, }, features: { context_length: 128000, max_output_tokens: 16384, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'standard', score: 94, description: 'ChatGPT-optimized GPT-5.2 chat model (128k/16k).', }, { id: 'gpt-5', aliases: ['gpt-5-2025-08-07'], provider: 'openai', cost: { input_per_million: 1.25, cached_input_per_million: 0.125, output_per_million: 10.0, }, features: { context_length: 400000, max_output_tokens: 128000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'reasoning', score: 95, scores: { monologue: 96, code: 95, reasoning: 92, }, description: 'Frontier flagship model for complex, multi-step tasks (400k/128k).', }, { id: 'gpt-5-pro', aliases: ['gpt-5-pro-2025-10-06'], provider: 'openai', cost: { input_per_million: 15.0, output_per_million: 120.0, }, features: { context_length: 400000, max_output_tokens: 272000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'reasoning', score: 98, description: 'High-accuracy GPT-5 variant for the hardest problems (400k/128k).', }, { id: 'gpt-5-chat-latest', aliases: ['gpt-5-chat'], provider: 'openai', cost: { input_per_million: 1.25, cached_input_per_million: 0.125, output_per_million: 10.0, }, features: { context_length: 128000, max_output_tokens: 16384, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'standard', score: 92, description: 'ChatGPT-optimized GPT-5 chat model (128k/16k).', }, { id: 'gpt-5-mini', aliases: ['gpt-5-mini-2025-08-07'], provider: 'openai', cost: { input_per_million: 0.25, cached_input_per_million: 0.025, output_per_million: 2.0, }, features: { context_length: 400000, max_output_tokens: 128000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'standard', score: 88, scores: { monologue: 88, code: 87, reasoning: 85, }, description: 'A faster, more cost-efficient version of GPT-5 (400k/128k).', }, { id: 'gpt-5-nano', aliases: ['gpt-5-nano-2025-08-07'], provider: 'openai', cost: { input_per_million: 0.05, cached_input_per_million: 0.005, output_per_million: 0.4, }, features: { context_length: 400000, max_output_tokens: 128000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'mini', score: 78, scores: { monologue: 78, code: 76, reasoning: 75, }, description: 'Fastest, most cost-efficient GPT-5 model (400k/128k).', }, { id: 'gpt-5.1', aliases: ['gpt-5.1-2025-11-13'], provider: 'openai', cost: { input_per_million: 1.25, cached_input_per_million: 0.125, output_per_million: 10.0, }, features: { context_length: 400000, max_output_tokens: 128000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'reasoning', score: 95, description: 'Prior-gen GPT-5.1 flagship model (400k/128k).', }, { id: 'gpt-5.1-chat-latest', provider: 'openai', cost: { input_per_million: 1.25, cached_input_per_million: 0.125, output_per_million: 10.0, }, features: { context_length: 128000, max_output_tokens: 16384, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'standard', score: 91, description: 'ChatGPT-optimized GPT-5.1 chat model (128k/16k).', }, { id: 'gpt-5-codex', aliases: ['gpt-5-codex-2025-09-15'], provider: 'openai', cost: { input_per_million: 1.25, cached_input_per_million: 0.125, output_per_million: 10.0, }, features: { context_length: 400000, max_output_tokens: 128000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'code', score: 90, description: 'Prior-gen Codex model for agentic coding (400k/128k).', }, { id: 'gpt-5.1-codex', provider: 'openai', cost: { input_per_million: 1.25, cached_input_per_million: 0.125, output_per_million: 10.0, }, features: { context_length: 400000, max_output_tokens: 128000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'code', score: 92, description: 'GPT-5.1 Codex — optimized for agentic coding tasks (400k/128k).', }, { id: 'gpt-5.1-codex-mini', provider: 'openai', cost: { input_per_million: 0.25, cached_input_per_million: 0.025, output_per_million: 2.0, }, features: { context_length: 400000, max_output_tokens: 128000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'code', score: 86, description: 'GPT-5.1 Codex Mini — lightweight coding model with 400k context.', }, { id: 'gpt-5.1-codex-max', provider: 'openai', cost: { input_per_million: 1.25, cached_input_per_million: 0.125, output_per_million: 10.0, }, features: { context_length: 400000, max_output_tokens: 128000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'code', score: 95, description: 'GPT-5.1 Codex Max — most capable Codex model for coding agents (400k/128k).', }, { id: 'gpt-4o', aliases: ['gpt-4o-2024-08-06'], provider: 'openai', cost: { input_per_million: 2.5, cached_input_per_million: 1.25, output_per_million: 10.0, }, features: { context_length: 128000, input_modality: ['text', 'image', 'audio'], output_modality: ['text', 'audio'], tool_use: true, streaming: true, json_output: true, }, class: 'standard', score: 80, description: 'OpenAI standard model with multimodal capabilities', }, { id: 'gpt-4o-mini', aliases: ['gpt-4o-mini-2024-07-18'], provider: 'openai', cost: { input_per_million: 0.15, cached_input_per_million: 0.075, output_per_million: 0.6, }, features: { context_length: 128000, input_modality: ['text', 'image', 'audio'], output_modality: ['text', 'audio'], tool_use: true, streaming: true, json_output: true, }, class: 'mini', score: 65, scores: { monologue: 70, code: 63, reasoning: 60, }, description: 'Smaller, faster version of GPT-4o', }, { id: 'o4-mini', aliases: ['o4-mini-2025-04-16', 'o4-mini-low', 'o4-mini-medium', 'o4-mini-high'], provider: 'openai', cost: { input_per_million: 1.1, cached_input_per_million: 0.275, output_per_million: 4.4, }, features: { context_length: 200000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'reasoning', score: 80, scores: { monologue: 85, code: 82, reasoning: 76, }, description: 'Faster, more affordable reasoning model', }, { id: 'o3', aliases: ['o3-2025-04-16'], provider: 'openai', cost: { input_per_million: 2, cached_input_per_million: 0.5, output_per_million: 8, }, features: { context_length: 200000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'reasoning', score: 85, scores: { monologue: 87, code: 84, reasoning: 79, }, description: 'Powerful reasoning model', }, { id: 'o3-pro', aliases: ['o3-pro-2025-06-10'], provider: 'openai', cost: { input_per_million: 20, output_per_million: 80, }, features: { context_length: 200000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, json_output: true, }, class: 'reasoning', score: 85, scores: { monologue: 87, code: 84, reasoning: 79, }, description: 'Most powerful reasoning model', }, { id: 'o3-deep-research', aliases: ['o3-deep-research-2025-06-26'], provider: 'openai', cost: { input_per_million: 10.0, cached_input_per_million: 2.5, output_per_million: 40.0, }, features: { context_length: 200000, input_modality: ['text', 'image', 'audio'], output_modality: ['text', 'image', 'audio'], tool_use: true, streaming: true, json_output: true, }, class: 'reasoning', score: 90, scores: { monologue: 92, code: 89, reasoning: 88, }, description: 'Our most powerful deep research model', }, { id: 'o1', aliases: ['o1-2024-12-17'], provider: 'openai', cost: { input_per_million: 15.0, cached_input_per_million: 7.5, output_per_million: 60.0, }, features: { context_length: 200000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'reasoning', description: 'Advanced reasoning model from OpenAI', }, { id: 'o1-pro', aliases: ['o1-pro-2025-03-19'], provider: 'openai', cost: { input_per_million: 150.0, output_per_million: 600.0, }, features: { context_length: 200000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: false, json_output: true, }, class: 'reasoning', score: 90, description: 'Premium O-series model from OpenAI, highest reasoning capability', }, { id: 'o4-mini', aliases: ['o4-mini-2025-01-31', 'o1-mini', 'o1-mini-2024-09-12'], provider: 'openai', cost: { input_per_million: 1.1, cached_input_per_million: 0.55, output_per_million: 4.4, }, features: { context_length: 200000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'reasoning', score: 70, description: 'Smaller O-series model with reasoning capabilities', }, { id: 'computer-use-preview', aliases: ['computer-use-preview-2025-03-11'], provider: 'openai', cost: { input_per_million: 3.0, output_per_million: 12.0, }, features: { input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'vision', description: 'Model that can understand and control computer interfaces', }, { id: 'claude-sonnet-4-5-20250514', aliases: ['claude-sonnet-4.5-2025-05-14', 'claude-sonnet-4-5-may-2025'], provider: 'anthropic', cost: { input_per_million: 3.0, output_per_million: 15.0, cached_input_per_million: 0.3, }, features: { context_length: 200000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, reasoning_output: true, max_output_tokens: 64000, }, class: 'reasoning', score: 88, scores: { monologue: 86, code: 85, reasoning: 84, }, description: 'Claude Sonnet 4.5 - Latest general-purpose model with strong reasoning and text/image support', }, { id: 'claude-sonnet-4-5-20250929', aliases: [ 'claude-sonnet-4-5', 'claude-sonnet-4.5', 'claude-sonnet-latest', 'claude-sonnet-4-5-sep-2025', 'claude-sonnet-4.5-2025-09-29', ], provider: 'anthropic', cost: { input_per_million: { threshold_tokens: 200000, price_below_threshold_per_million: 3.0, price_above_threshold_per_million: 6.0, }, output_per_million: { threshold_tokens: 200000, price_below_threshold_per_million: 15.0, price_above_threshold_per_million: 22.5, }, cached_input_per_million: { threshold_tokens: 200000, price_below_threshold_per_million: 0.3, price_above_threshold_per_million: 0.6, }, }, features: { context_length: 200000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, reasoning_output: true, max_output_tokens: 64000, }, class: 'reasoning', score: 88, description: 'Claude Sonnet 4.5 (Sep 2025 refresh) with tiered pricing at 200k tokens and vision support.', }, { id: 'claude-sonnet-4-5-20250514-long-context', aliases: ['claude-sonnet-4-5-long', 'claude-sonnet-4.5-long'], provider: 'anthropic', cost: { input_per_million: 6.0, output_per_million: 22.5, cached_input_per_million: 0.6, }, features: { context_length: 1_000_000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, reasoning_output: true, max_output_tokens: 64000, }, class: 'reasoning', score: 88, description: 'Claude Sonnet 4.5 with 1M token context window - for long-context processing', }, { id: 'claude-haiku-4-5-20250514', aliases: ['claude-haiku-4.5-2025-05-14', 'claude-haiku-4-5-may-2025'], provider: 'anthropic', cost: { input_per_million: 1.0, output_per_million: 5.0, cached_input_per_million: 0.1, }, features: { context_length: 200000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, max_output_tokens: 8192, }, class: 'mini', score: 74, scores: { monologue: 72, code: 71, reasoning: 70, }, description: 'Claude Haiku 4.5 - Fast, cost-effective model with text and image support', }, { id: 'claude-haiku-4-5-20251001', aliases: [ 'claude-haiku-4-5', 'claude-haiku-4.5', 'claude-haiku-latest', 'claude-haiku-4-5-oct-2025', 'claude-haiku-4.5-2025-10-01', ], provider: 'anthropic', cost: { input_per_million: 1.0, output_per_million: 5.0, cached_input_per_million: 0.1, }, features: { context_length: 200000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, max_output_tokens: 8192, }, class: 'mini', score: 74, description: 'Claude Haiku 4.5 (Oct 2025 refresh) fast tier with vision support.', }, { id: 'claude-cli', provider: 'anthropic', cost: { input_per_million: 3.0, output_per_million: 15.0, cached_input_per_million: 0.3, }, features: { context_length: 200000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'reasoning', description: 'Claude accessed via CLI (likely uses latest Sonnet or Haiku model)', }, { id: 'claude-opus-4-1-20250805', aliases: ['claude-opus-4-1', 'claude-opus-4.1', 'claude-4-opus'], provider: 'anthropic', cost: { input_per_million: 15.0, output_per_million: 75.0, cached_input_per_million: 1.5, }, features: { context_length: 200000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, reasoning_output: true, max_output_tokens: 32000, }, class: 'reasoning', score: 95, scores: { monologue: 94, code: 94, reasoning: 93, }, description: 'Claude Opus 4.1 - Highest intelligence and capability with reasoning support', }, { id: 'claude-opus-4-5-20251101', aliases: [ 'claude-opus-4-5', 'claude-opus-4.5', 'claude-opus', 'claude-opus-latest', 'claude-4.5-opus', ], provider: 'anthropic', cost: { input_per_million: 15.0, output_per_million: 75.0, cached_input_per_million: 1.5, }, features: { context_length: 200000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, reasoning_output: true, max_output_tokens: 32000, }, class: 'reasoning', score: 96, description: 'Claude Opus 4.5 - Latest Opus model with strongest reasoning, code, and long-form capabilities.', }, { id: 'gemini-2.5-pro-preview-06-05', aliases: ['gemini-2.5-pro', 'gemini-2.5-pro-exp-03-25', 'gemini-2.5-pro-preview-05-06'], provider: 'google', cost: { input_per_million: 1.25, output_per_million: 10.0, cached_input_per_million: 0.13, }, features: { context_length: 1048576, input_modality: ['text', 'image', 'video', 'audio'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, max_output_tokens: 65536, }, class: 'reasoning', score: 80, description: 'Paid preview of Gemini 2.5 Pro. State-of-the-art multipurpose model.', }, { id: 'gemini-3-flash-preview', aliases: ['gemini-3-flash'], provider: 'google', cost: { input_per_million: { text: 0.5, image: 0.5, video: 0.5, audio: 1.0, }, output_per_million: 3.0, }, features: { context_length: 1048576, input_modality: ['text', 'image', 'video', 'audio'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, max_output_tokens: 65536, }, class: 'standard', description: 'Gemini 3 Flash Preview - fast multimodal model with 1M context window.', }, { id: 'gemini-2.5-flash', aliases: [ 'gemini-2.5-flash-preview-05-20', 'gemini-2.5-flash-preview-04-17', ], provider: 'google', cost: { input_per_million: { text: 1.0, audio: 0.0375, }, output_per_million: 0.6, }, features: { context_length: 1048576, input_modality: ['text', 'image', 'video', 'audio'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, max_output_tokens: 65536, }, class: 'reasoning', score: 75, scores: { monologue: 12, code: 63, reasoning: 78, }, description: 'Balanced multimodal model with large context, built for Agents.', }, { id: 'gemini-2.5-flash-lite', aliases: ['gemini-2.5-flash-lite-preview-06-17'], provider: 'google',