UNPKG

@just-every/ensemble

Version:

LLM provider abstraction layer with unified streaming interface

1,550 lines 46.9 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.MODEL_REGISTRY = exports.MODEL_CLASSES = void 0; exports.findModel = findModel; const external_models_js_1 = require("../utils/external_models.cjs"); exports.MODEL_CLASSES = { standard: { models: [ 'gpt-4.1', 'gemini-2.5-flash-preview-05-20-low', 'claude-3-5-haiku-latest', 'grok-3-mini-fast', 'deepseek-chat', ], random: true, }, mini: { models: [ 'gpt-4.1-nano', 'claude-3-5-haiku-latest', 'gemini-2.5-flash-lite-preview-06-17', 'grok-3-mini', 'meta-llama/llama-4-scout', 'mistral/ministral-8b', ], random: true, }, reasoning: { models: [ 'gemini-2.5-pro-preview-06-05', 'gemini-2.5-flash-preview-05-20-max', 'o4-mini-high', 'o3-pro', 'o3-high', 'claude-opus-4-20250514-max', 'claude-sonnet-4-20250514-max', 'claude-3-7-sonnet-latest-max', 'mistralai/magistral-medium-2506:thinking', ], random: true, }, reasoning_mini: { models: [ 'gemini-2.5-flash-preview-05-20-medium', 'grok-3-mini-fast', 'o3-low', ], random: true, }, monologue: { models: [ 'gemini-2.5-pro-preview-06-05', 'gemini-2.5-flash-preview-05-20-medium', 'o4-mini-low', 'o3-medium', 'claude-sonnet-4-20250514-medium', ], random: true, }, metacognition: { models: [ 'gemini-2.5-pro-preview-06-05', 'gemini-2.5-flash-preview-05-20-high', 'o3-high', 'claude-sonnet-4-20250514-medium', 'grok-3-mini-fast', ], random: true, }, code: { models: [ 'codex-mini-latest', 'claude-opus-4-20250514-medium', 'claude-sonnet-4-20250514-max', 'o3-high', 'gemini-2.5-flash-preview-05-20-medium', ], random: true, }, writing: { models: [ 'gemini-2.5-flash-lite-preview-06-17', 'gpt-4.1-mini', ], random: true, }, summary: { models: [ 'gemini-2.5-flash-lite-preview-06-17', 'gpt-4.1-mini', ], random: true, }, vision: { models: [ 'o4-mini-medium', 'o3-medium', 'gemini-2.5-flash-preview-05-20-max', 'gemini-2.5-pro-preview-06-05', 'claude-opus-4-20250514-low', 'claude-sonnet-4-20250514-max', ], random: true, }, vision_mini: { models: [ 'gpt-4.1', 'o3-low', 'o4-mini-low', 'gemini-2.5-flash-lite-preview-06-17', 'gemini-2.5-flash-preview-05-20', 'claude-sonnet-4-20250514-low', ], random: true, }, search: { models: [ 'gpt-4.1', 'deepseek-reasoner', 'gemini-2.5-flash-lite-preview-06-17', 'perplexity/sonar-deep-research', ], random: true, }, long: { models: [ 'gpt-4.1', 'gpt-4.1-nano', 'gpt-4.1-mini', 'gemini-2.5-pro-preview-06-05', 'gemini-2.5-flash-preview-05-20-medium', 'gemini-2.5-flash-preview-05-20-low', 'gemini-2.5-flash-lite-preview-06-17', ], random: true, description: 'Models with very large context windows (near 1M tokens) for processing long documents', }, image_generation: { models: [ 'gpt-image-1', 'imagen-3.0-generate-002', ], }, embedding: { models: [ 'text-embedding-3-small', 'gemini-embedding-exp-03-07', ], description: 'Vector embedding models for semantic search and RAG', }, voice: { models: [ 'gpt-4o-mini-tts', 'tts-1', 'tts-1-hd', 'eleven_multilingual_v2', 'eleven_turbo_v2_5', 'eleven_flash_v2_5', 'gemini-2.5-flash-preview-tts', 'gemini-2.5-pro-preview-tts', ], description: 'Text-to-Speech models for voice generation', }, transcription: { models: [ 'gemini-2.0-flash-live-001', ], description: 'Speech-to-Text models for audio transcription with real-time streaming', }, }; exports.MODEL_REGISTRY = [ { id: 'text-embedding-3-small', provider: 'openai', cost: { input_per_million: 0.02, output_per_million: 0, }, features: { input_modality: ['text'], output_modality: ['embedding'], input_token_limit: 8191, }, embedding: true, dim: 1536, class: 'embedding', description: "OpenAI's small embedding model, good balance of performance and cost", }, { id: 'text-embedding-3-large', provider: 'openai', cost: { input_per_million: 0.13, output_per_million: 0, }, features: { input_modality: ['text'], output_modality: ['embedding'], input_token_limit: 8191, }, embedding: true, dim: 3072, class: 'embedding', description: "OpenAI's large embedding model, good balance of performance and cost", }, { id: 'gemini-embedding-exp-03-07', provider: 'google', cost: { input_per_million: 0, output_per_million: 0, }, features: { input_modality: ['text'], output_modality: ['embedding'], input_token_limit: 8191, }, embedding: true, dim: 768, class: 'embedding', description: "Google's experimental embedding model optimized for semantic similarity", }, { id: 'meta-llama/llama-4-maverick', provider: 'openrouter', cost: { input_per_million: 0.18, output_per_million: 0.6, }, features: { context_length: 1048576, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'standard', score: 78, scores: { monologue: 72, code: 64, reasoning: 56, }, description: 'Llama 4 Maverick 17B Instruct (128E) is a high-capacity multimodal language model from Meta, built on a mixture-of-experts (MoE) architecture with 128 experts and 17 billion active parameters per forward pass (400B total).', }, { id: 'meta-llama/llama-4-scout', provider: 'openrouter', cost: { input_per_million: 0.08, output_per_million: 0.3, }, features: { context_length: 327680, input_modality: ['text'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'mini', score: 65, description: 'Llama 4 Scout 17B Instruct (16E) is a mixture-of-experts (MoE) language model developed by Meta, activating 17 billion parameters out of a total of 109B.', }, { id: 'qwen/qwen3-235b-a22b', provider: 'openrouter', cost: { input_per_million: 0.1, output_per_million: 0.1, }, features: { context_length: 40960, input_modality: ['text'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'reasoning', score: 83, scores: { monologue: 73, code: 62, reasoning: 57, }, description: 'Qwen3-235B-A22B is a 235B parameter mixture-of-experts (MoE) model developed by Qwen, activating 22B parameters per forward pass.', }, { id: 'qwen/qwen-max', provider: 'openrouter', cost: { input_per_million: 1.6, output_per_million: 6.4, }, features: { context_length: 131072, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'reasoning', score: 80, scores: { monologue: 73, code: 61, reasoning: 57, }, description: 'Qwen-Max, based on Qwen2.5, provides the best inference performance among Qwen models, especially for complex multi-step tasks.', }, { id: 'mistral/ministral-8b', provider: 'openrouter', cost: { input_per_million: 0.1, output_per_million: 0.1, }, features: { context_length: 131072, input_modality: ['text'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'standard', score: 55, description: 'Ministral 8B is a state-of-the-art language model optimized for on-device and edge computing. Designed for efficiency in knowledge-intensive tasks, commonsense reasoning, and function-calling.', }, { id: 'grok-3', aliases: ['grok-3-2025-02-11'], provider: 'xai', cost: { input_per_million: 3.0, output_per_million: 15.0, }, features: { context_length: 131_072, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'standard', score: 78, scores: { monologue: 80, code: 70, reasoning: 65, }, description: 'Flagship Grok-3 model for complex reasoning and generation', }, { id: 'grok-3-fast', aliases: ['grok-3-fast-2025-04-11'], provider: 'xai', cost: { input_per_million: 5.0, output_per_million: 25.0, }, features: { context_length: 131_072, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'standard', score: 78, scores: { monologue: 80, code: 70, reasoning: 65, }, description: 'Same Grok-3 weights on premium infra for lower latency', }, { id: 'grok-3-mini', aliases: ['grok-3-mini-2025-04-11'], provider: 'xai', cost: { input_per_million: 0.3, output_per_million: 0.5, }, features: { context_length: 131_072, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'mini', score: 60, scores: { monologue: 62, code: 55, reasoning: 50, }, description: 'Lightweight Grok-3 Mini—budget model for logic tasks', }, { id: 'grok-3-mini-fast', aliases: ['grok-3-mini-fast-2025-04-11'], provider: 'xai', cost: { input_per_million: 0.6, output_per_million: 4.0, }, features: { context_length: 131_072, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'mini', score: 60, scores: { monologue: 62, code: 55, reasoning: 50, }, description: 'Grok-3 Mini on accelerated hardware for latency-critical use', }, { id: 'gpt-4.1', aliases: ['gpt-4.1-2025-04-14'], provider: 'openai', cost: { input_per_million: 2.0, cached_input_per_million: 0.5, output_per_million: 8.0, }, features: { context_length: 1048576, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'standard', score: 82, scores: { monologue: 86, code: 83, reasoning: 71, }, description: 'Flagship GPT model for complex tasks', }, { id: 'gpt-4.1-mini', aliases: ['gpt-4.1-mini-2025-04-14'], provider: 'openai', cost: { input_per_million: 0.4, cached_input_per_million: 0.1, output_per_million: 1.6, }, features: { context_length: 1048576, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'mini', score: 75, description: 'Balanced for intelligence, speed, and cost', }, { id: 'gpt-4.1-nano', aliases: ['gpt-4.1-nano-2025-04-14'], provider: 'openai', cost: { input_per_million: 0.1, cached_input_per_million: 0.025, output_per_million: 0.4, }, features: { context_length: 1048576, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'mini', score: 60, description: 'Fastest, most cost-effective GPT-4.1 model', }, { id: 'gpt-4.5-preview', aliases: ['gpt-4.5-preview-2025-02-27'], provider: 'openai', cost: { input_per_million: 75.0, cached_input_per_million: 37.5, output_per_million: 150.0, }, features: { context_length: 128000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'standard', description: 'Latest premium GPT model from OpenAI', }, { id: 'gpt-4o', aliases: ['gpt-4o-2024-08-06'], provider: 'openai', cost: { input_per_million: 2.5, cached_input_per_million: 1.25, output_per_million: 10.0, }, features: { context_length: 128000, input_modality: ['text', 'image', 'audio'], output_modality: ['text', 'audio'], tool_use: true, streaming: true, json_output: true, }, class: 'standard', score: 80, description: 'OpenAI standard model with multimodal capabilities', }, { id: 'gpt-4o-mini', aliases: ['gpt-4o-mini-2024-07-18'], provider: 'openai', cost: { input_per_million: 0.15, cached_input_per_million: 0.075, output_per_million: 0.6, }, features: { context_length: 128000, input_modality: ['text', 'image', 'audio'], output_modality: ['text', 'audio'], tool_use: true, streaming: true, json_output: true, }, class: 'mini', score: 65, scores: { monologue: 70, code: 63, reasoning: 60, }, description: 'Smaller, faster version of GPT-4o', }, { id: 'o4-mini', aliases: ['o4-mini-2025-04-16', 'o4-mini-low', 'o4-mini-medium', 'o4-mini-high'], provider: 'openai', cost: { input_per_million: 1.1, cached_input_per_million: 0.275, output_per_million: 4.4, }, features: { context_length: 200000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'reasoning', score: 80, scores: { monologue: 85, code: 82, reasoning: 76, }, description: 'Faster, more affordable reasoning model', }, { id: 'o3', aliases: ['o3-2025-04-16'], provider: 'openai', cost: { input_per_million: 2, cached_input_per_million: 0.5, output_per_million: 8, }, features: { context_length: 200000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'reasoning', score: 85, scores: { monologue: 87, code: 84, reasoning: 79, }, description: 'Powerful reasoning model', }, { id: 'o3-pro', aliases: ['o3-pro-2025-06-10'], provider: 'openai', cost: { input_per_million: 20, output_per_million: 80, }, features: { context_length: 200000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, json_output: true, }, class: 'reasoning', score: 85, scores: { monologue: 87, code: 84, reasoning: 79, }, description: 'Most powerful reasoning model', }, { id: 'o1', aliases: ['o1-2024-12-17'], provider: 'openai', cost: { input_per_million: 15.0, cached_input_per_million: 7.5, output_per_million: 60.0, }, features: { context_length: 200000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'reasoning', description: 'Advanced reasoning model from OpenAI', }, { id: 'o1-pro', aliases: ['o1-pro-2025-03-19'], provider: 'openai', cost: { input_per_million: 150.0, output_per_million: 600.0, }, features: { context_length: 200000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: false, json_output: true, }, class: 'reasoning', score: 90, description: 'Premium O-series model from OpenAI, highest reasoning capability', }, { id: 'o4-mini', aliases: ['o4-mini-2025-01-31', 'o1-mini', 'o1-mini-2024-09-12'], provider: 'openai', cost: { input_per_million: 1.1, cached_input_per_million: 0.55, output_per_million: 4.4, }, features: { context_length: 200000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'reasoning', score: 70, description: 'Smaller O-series model with reasoning capabilities', }, { id: 'computer-use-preview', aliases: ['computer-use-preview-2025-03-11'], provider: 'openai', cost: { input_per_million: 3.0, output_per_million: 12.0, }, features: { input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'vision', description: 'Model that can understand and control computer interfaces', }, { id: 'claude-3-7-sonnet-latest', aliases: ['claude-3-7-sonnet'], provider: 'anthropic', cost: { input_per_million: 3.0, output_per_million: 15.0, cached_input_per_million: 0.3, }, features: { context_length: 200000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, max_output_tokens: 64000, }, class: 'reasoning', score: 85, scores: { monologue: 83, code: 77, reasoning: 69, }, description: 'Latest Claude model with strong reasoning capabilities (extended thinking internal)', }, { id: 'claude-3-5-haiku-latest', aliases: ['claude-3-5-haiku'], provider: 'anthropic', cost: { input_per_million: 0.8, output_per_million: 4.0, cached_input_per_million: 0.08, }, features: { context_length: 200000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, max_output_tokens: 8192, }, class: 'mini', score: 70, scores: { monologue: 66, code: 63, reasoning: 55, }, description: 'Fast, cost-effective Claude model', }, { id: 'claude-cli', provider: 'anthropic', cost: { input_per_million: 3.0, output_per_million: 15.0, cached_input_per_million: 0.3, }, features: { context_length: 200000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'reasoning', description: 'Claude accessed via CLI (likely uses latest Sonnet or Haiku model)', }, { id: 'claude-opus-4-20250514', aliases: ['claude-opus-4', 'claude-4-opus'], provider: 'anthropic', cost: { input_per_million: 15.0, output_per_million: 75.0, cached_input_per_million: 1.5, }, features: { context_length: 200000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, max_output_tokens: 32000, reasoning_output: true, }, class: 'reasoning', score: 95, description: 'Claude Opus 4 - Highest level of intelligence and capability with extended thinking', }, { id: 'claude-sonnet-4-20250514', aliases: ['claude-sonnet-4', 'claude-4-sonnet'], provider: 'anthropic', cost: { input_per_million: 3.0, output_per_million: 15.0, cached_input_per_million: 0.3, }, features: { context_length: 200000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, max_output_tokens: 64000, reasoning_output: true, }, class: 'reasoning', score: 90, description: 'Claude Sonnet 4 - High intelligence and balanced performance with extended thinking', }, { id: 'gemini-2.5-pro-preview-06-05', aliases: ['gemini-2.5-pro', 'gemini-2.5-pro-exp-03-25', 'gemini-2.5-pro-preview-05-06'], provider: 'google', cost: { input_per_million: { threshold_tokens: 200000, price_below_threshold_per_million: 1.25, price_above_threshold_per_million: 2.5, }, output_per_million: { threshold_tokens: 200000, price_below_threshold_per_million: 10.0, price_above_threshold_per_million: 15.0, }, }, features: { context_length: 1048576, input_modality: ['text', 'image', 'video', 'audio'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, max_output_tokens: 65536, }, class: 'reasoning', score: 80, description: 'Paid preview of Gemini 2.5 Pro. State-of-the-art multipurpose model.', }, { id: 'gemini-2.5-flash-preview-05-20', aliases: ['gemini-2.5-flash', 'gemini-2.5-flash-preview-04-17'], provider: 'google', cost: { input_per_million: 0.3, output_per_million: 2.5, }, features: { context_length: 1048576, input_modality: ['text', 'image', 'video', 'audio'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, max_output_tokens: 65536, }, class: 'reasoning', score: 75, scores: { monologue: 12, code: 63, reasoning: 78, }, description: 'Balanced multimodal model with large context, built for Agents.', }, { id: 'gemini-2.5-flash-lite-preview-06-17', aliases: ['gemini-2.5-flash-lite'], provider: 'google', cost: { input_per_million: 0.1, output_per_million: 0.4, }, features: { context_length: 1000000, input_modality: ['text', 'image', 'video', 'audio'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, max_output_tokens: 64000, }, class: 'reasoning_mini', score: 75, scores: { monologue: 12, code: 63, reasoning: 78, }, description: 'Balanced multimodal model with large context, built for Agents.', }, { id: 'gemini-2.0-flash-lite', provider: 'google', cost: { input_per_million: 0.075, output_per_million: 0.3, }, features: { context_length: 1048576, input_modality: ['text', 'image', 'video', 'audio'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, max_output_tokens: 8192, }, class: 'standard', score: 75, scores: { monologue: 70, code: 55, reasoning: 56, }, description: 'Lite multimodal model with large context, built for Agents.', }, { id: 'gemini-2.0-flash', provider: 'google', cost: { input_per_million: 0.1, output_per_million: 0.4, cached_input_per_million: 0.025, }, features: { context_length: 1048576, input_modality: ['text', 'image', 'video', 'audio'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, max_output_tokens: 8192, }, class: 'standard', score: 75, scores: { monologue: 70, code: 55, reasoning: 56, }, description: 'Balanced multimodal model with large context, built for Agents.', }, { id: 'gpt-image-1', provider: 'openai', cost: { per_image: 0.042, }, features: { input_modality: ['text', 'image'], output_modality: ['image'], streaming: false, }, class: 'image_generation', description: "OpenAI's GPT-Image-1 model for text-to-image generation. Supports quality levels (low: $0.011-0.016, medium: $0.042-0.063, high: $0.167-0.25) and sizes (1024x1024, 1024x1536, 1536x1024).", }, { id: 'gpt-4o-mini-tts', provider: 'openai', cost: { input_per_million: 0.6, output_per_million: 12.0, }, features: { input_modality: ['text'], output_modality: ['audio'], streaming: true, }, class: 'voice', description: "OpenAI's advanced text-to-speech model with natural-sounding output. Supports customizable tone, style, and emotion through instructions. 85% cheaper than ElevenLabs with estimated $0.015/minute of audio.", }, { id: 'tts-1', provider: 'openai', cost: { input_per_million: 15.0, output_per_million: 0, }, features: { input_modality: ['text'], output_modality: ['audio'], streaming: true, }, class: 'voice', description: "OpenAI's standard text-to-speech model, optimized for real-time use. Supports 6 voices and multiple audio formats.", }, { id: 'tts-1-hd', provider: 'openai', cost: { input_per_million: 30.0, output_per_million: 0, }, features: { input_modality: ['text'], output_modality: ['audio'], streaming: true, }, class: 'voice', description: "OpenAI's high-definition text-to-speech model for superior audio quality. Supports 6 voices and multiple audio formats.", }, { id: 'eleven_multilingual_v2', provider: 'elevenlabs', cost: { input_per_million: 55, output_per_million: 0, }, features: { input_modality: ['text'], output_modality: ['audio'], streaming: true, }, class: 'voice', description: "ElevenLabs' multilingual high quality text-to-speech model supporting 29 languages with natural voice capabilities.", }, { id: 'eleven_turbo_v2_5', provider: 'elevenlabs', cost: { input_per_million: 27.5, output_per_million: 0, }, features: { input_modality: ['text'], output_modality: ['audio'], streaming: true, }, class: 'voice', description: "ElevenLabs' turbo model optimized for low-latency text-to-speech with high quality output.", }, { id: 'eleven_flash_v2_5', provider: 'elevenlabs', cost: { input_per_million: 27.5, output_per_million: 0, }, features: { input_modality: ['text'], output_modality: ['audio'], streaming: true, }, class: 'voice', description: "ElevenLabs' fastest model optimized for ultra low-latency text-to-speech.", }, { id: 'gemini-2.5-flash-preview-tts', provider: 'google', cost: { input_per_million: 10.0, output_per_million: 0, }, features: { input_modality: ['text'], output_modality: ['audio'], streaming: true, context_length: 32000, }, class: 'voice', description: "Gemini's fast text-to-speech model with support for 24 languages and 30 distinct voices. Optimized for low-latency applications.", }, { id: 'gemini-2.5-pro-preview-tts', provider: 'google', cost: { input_per_million: 20.0, output_per_million: 0, }, features: { input_modality: ['text'], output_modality: ['audio'], streaming: true, context_length: 32000, }, class: 'voice', description: "Gemini's advanced text-to-speech model with superior voice quality, expression control, and multi-speaker support for creating dynamic conversations.", }, { id: 'codex-mini-latest', provider: 'openai', cost: { input_per_million: 1.5, cached_input_per_million: 0.375, output_per_million: 6.0, }, features: { context_length: 200000, max_output_tokens: 100000, input_modality: ['text', 'image'], output_modality: ['text'], tool_use: false, streaming: true, json_output: true, reasoning_output: true, }, class: 'code', description: 'Fine-tuned o4-mini model for Codex CLI with reasoning token support', }, { id: 'perplexity/sonar', provider: 'openrouter', cost: { input_per_million: 1.0, output_per_million: 1.0, }, features: { context_length: 32768, input_modality: ['text'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'standard', description: 'Lightweight, cost-effective search model designed for quick, grounded answers.', }, { id: 'perplexity/sonar-pro', provider: 'openrouter', cost: { input_per_million: 3.0, output_per_million: 15.0, }, features: { context_length: 32768, input_modality: ['text'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'standard', description: 'Advanced search model optimized for complex queries and deeper content understanding.', }, { id: 'perplexity/sonar-reasoning', provider: 'openrouter', cost: { input_per_million: 1.0, output_per_million: 5.0, }, features: { context_length: 32768, input_modality: ['text'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'reasoning_mini', description: 'Quick problem-solving and reasoning model, ideal for evaluating complex queries.', }, { id: 'perplexity/sonar-reasoning-pro', provider: 'openrouter', cost: { input_per_million: 2.0, output_per_million: 8.0, }, features: { context_length: 32768, input_modality: ['text'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'reasoning', description: 'Enhanced reasoning model with multi-step problem-solving capabilities and real-time search.', }, { id: 'perplexity/sonar-deep-research', provider: 'openrouter', cost: { input_per_million: 2.0, output_per_million: 8.0, }, features: { context_length: 32768, input_modality: ['text'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'standard', description: 'Best suited for exhaustive research, generating detailed reports and in-depth insights.', }, { id: 'mistralai/magistral-small-2506', provider: 'openrouter', cost: { input_per_million: 0.5, output_per_million: 1.5, }, features: { context_length: 40000, input_modality: ['text'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'reasoning_mini', score: 72, description: 'Magistral Small is a 24B parameter instruction-tuned model based on Mistral-Small-3.1 (2503), enhanced through supervised fine-tuning on traces from Magistral Medium and further refined via reinforcement learning. It is optimized for reasoning and supports a wide multilingual range, including over 20 languages.', }, { id: 'mistralai/magistral-medium-2506:thinking', provider: 'openrouter', cost: { input_per_million: 2.0, output_per_million: 5.0, }, features: { context_length: 40960, input_modality: ['text'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, reasoning_output: true, }, class: 'reasoning', score: 80, description: "Magistral is Mistral's first reasoning model. It is ideal for general purpose use requiring longer thought processing and better accuracy than with non-reasoning LLMs. From legal research and financial forecasting to software development and creative storytelling — this model solves multi-step challenges where transparency and precision are critical.", }, { id: 'test-model', provider: 'test', cost: { input_per_million: 0, output_per_million: 0, }, features: { context_length: 8192, input_modality: ['text'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'standard', scores: { monologue: 50, code: 50, reasoning: 50, }, description: 'Test model for unit testing purposes', }, { id: 'dall-e-3', provider: 'openai', cost: { per_image: 0.04, }, features: { input_modality: ['text'], output_modality: ['image'], }, class: 'image_generation', description: "OpenAI's DALL-E 3 model for high-quality image generation", }, { id: 'dall-e-2', provider: 'openai', cost: { per_image: 0.02, }, features: { input_modality: ['text', 'image'], output_modality: ['image'], }, class: 'image_generation', description: "OpenAI's DALL-E 2 model, supports image editing and variations", }, { id: 'imagen-3.0-generate-002', aliases: ['imagen-3'], provider: 'google', cost: { per_image: 0.04, }, features: { input_modality: ['text'], output_modality: ['image'], }, class: 'image_generation', description: "Google's Imagen 3 model for high-quality image generation", }, { id: 'imagen-2', provider: 'google', cost: { per_image: 0.02, }, features: { input_modality: ['text'], output_modality: ['image'], }, class: 'image_generation', description: "Google's Imagen 2 model for image generation", }, { id: 'gemini-live-2.5-flash-preview', provider: 'google', cost: { input_per_million: { text: 0.5, audio: 3.0, video: 3.0, }, output_per_million: { text: 2.0, audio: 12.0, }, }, features: { context_length: 32000, input_modality: ['text', 'audio', 'video'], output_modality: ['text', 'audio'], streaming: true, }, class: 'transcription', description: 'Gemini Live API for real-time multimodal interaction with modality-specific pricing', }, { id: 'gemini-2.0-flash-live-001', provider: 'google', cost: { input_per_million: { text: 0.35, audio: 2.1, video: 2.1, }, output_per_million: { text: 1.5, audio: 8.5, }, }, features: { context_length: 32000, input_modality: ['text', 'audio', 'video'], output_modality: ['text', 'audio'], streaming: true, }, class: 'transcription', description: 'Gemini 2.0 Flash Live API for real-time multimodal interaction', }, { id: 'gpt-4o-transcribe', provider: 'openai', cost: { input_per_million: { audio: 6.0, }, output_per_million: { text: 0, }, }, features: { context_length: 128000, input_modality: ['audio'], output_modality: ['text'], streaming: true, }, class: 'transcription', description: 'GPT-4o transcription with incremental streaming output', }, { id: 'gpt-4o-mini-transcribe', provider: 'openai', cost: { input_per_million: { audio: 6.0, }, output_per_million: { text: 0, }, }, features: { context_length: 128000, input_modality: ['audio'], output_modality: ['text'], streaming: true, }, class: 'transcription', description: 'GPT-4o Mini transcription with incremental streaming output', }, { id: 'whisper-1', provider: 'openai', cost: { input_per_million: { audio: 6.0, }, output_per_million: { text: 0, }, }, features: { context_length: 25600, input_modality: ['audio'], output_modality: ['text'], streaming: true, }, class: 'transcription', description: 'OpenAI Whisper transcription with full-turn output', }, { id: 'deepseek-chat', aliases: ['deepseek-v3-0324'], provider: 'deepseek', cost: { input_per_million: { peak_utc_start_hour: 0, peak_utc_start_minute: 30, peak_utc_end_hour: 16, peak_utc_end_minute: 30, peak_price_per_million: 0.27, off_peak_price_per_million: 0.135, }, cached_input_per_million: { peak_utc_start_hour: 0, peak_utc_start_minute: 30, peak_utc_end_hour: 16, peak_utc_end_minute: 30, peak_price_per_million: 0.07, off_peak_price_per_million: 0.035, }, output_per_million: { peak_utc_start_hour: 0, peak_utc_start_minute: 30, peak_utc_end_hour: 16, peak_utc_end_minute: 30, peak_price_per_million: 1.1, off_peak_price_per_million: 0.55, }, }, features: { context_length: 64000, max_output_tokens: 8192, input_modality: ['text'], output_modality: ['text'], tool_use: true, streaming: true, json_output: true, }, class: 'standard', score: 75, description: 'DeepSeek-V3 chat model with FIM completion support and time-based pricing', }, { id: 'deepseek-reasoner', aliases: ['deepseek-r1-0528'], provider: 'deepseek', cost: { input_per_million: { peak_utc_start_hour: 0, peak_utc_start_minute: 30, peak_utc_end_hour: 16, peak_utc_end_minute: 30, peak_price_per_million: 0.55, off_peak_price_per_million: 0.1375, }, cached_input_per_million: { peak_utc_start_hour: 0, peak_utc_start_minute: 30, peak_utc_end_hour: 16, peak_utc_end_minute: 30, peak_price_per_million: 0.14, off_peak_price_per_million: 0.035, }, output_per_million: { peak_utc_start_hour: 0, peak_utc_start_minute: 30, peak_utc_end_hour: 16, peak_utc_end_minute: 30, peak_price_per_million: 2.19, off_peak_price_per_million: 0.5475, }, }, features: { context_length: 64000, max_output_tokens: 64000, input_modality: ['text'], output_modality: ['text'], tool_use: true, simulate_tools: true, streaming: true, json_output: true, reasoning_output: true, }, class: 'reasoning', score: 85, description: 'DeepSeek-R1 advanced reasoning model with extended output and time-based pricing', }, ]; function findModel(modelId) { const externalModel = (0, external_models_js_1.getExternalModel)(modelId); if (externalModel) return externalModel; const directMatch = exports.MODEL_REGISTRY.find(model => model.id === modelId); if (directMatch) return directMatch; const aliasMatch = exports.MODEL_REGISTRY.find(model => model.aliases?.includes(modelId)); if (aliasMatch) return aliasMatch; const suffixes = ['-low', '-medium', '-high', '-max']; for (const suffix of suffixes) { if (modelId.endsWith(suffix)) { const baseName = modelId.slice(0, -suffix.length); return findModel(baseName); } } return undefined; } //# sourceMappingURL=model_data.js.map