UNPKG

modelmix

Version:

🧬 Reliable interface with automatic fallback for AI LLMs.

1,181 lines (1,041 loc) • 108 kB
const fs = require('fs'); const fileType = require('file-type'); const detectFileTypeFromBuffer = fileType.fileTypeFromBuffer || fileType.fromBuffer; const { inspect } = require('util'); const log = require('lemonlog')('ModelMix'); const Bottleneck = require('bottleneck'); const path = require('path'); const WebSocket = require('ws'); const generateJsonSchema = require('./schema'); const { Client } = require("@modelcontextprotocol/sdk/client/index.js"); const { StdioClientTransport } = require("@modelcontextprotocol/sdk/client/stdio.js"); const { MCPToolsManager } = require('./mcp-tools'); const { stripContentTypeHeader, createMultipartFormData, buildRequestBodyAndHeaders } = require('./multipart'); const { fetchJsonResponse, fetchBinaryResponse, fetchStreamResponse } = require('./http-client'); const DEFAULT_RETRYABLE_STATUS_CODES = [408, 425, 429, 500, 502, 503, 504, 529]; function getErrorStatusCode(error) { return error?.statusCode ?? error?.response?.status ?? error?.response?.statusCode ?? null; } function sleep(ms) { return new Promise(resolve => setTimeout(resolve, ms)); } // Pricing per 1M tokens: [input, output] in USD // Based on provider pricing pages linked in README const MODEL_PRICING = { // OpenAI 'gpt-realtime-mini': [0.60, 2.40], 'gpt-realtime': [4.00, 16.00], 'gpt-5.5-pro': [30.00, 180.00], 'gpt-5.5': [5.00, 30.00], 'gpt-5.4': [2.50, 15.00], 'gpt-5.4-pro': [30, 180.00], 'gpt-5.4-mini': [0.75, 4.50], 'gpt-5.4-nano': [0.20, 1.25], 'gpt-5.3-codex': [1.75, 14.00], 'gpt-5.2': [1.75, 14.00], 'gpt-5.2-chat-latest': [1.75, 14.00], 'gpt-5.1': [1.25, 10.00], 'gpt-5': [1.25, 10.00], 'gpt-5-mini': [0.25, 2.00], 'gpt-5-nano': [0.05, 0.40], 'gpt-4.1': [2.00, 8.00], 'gpt-4.1-mini': [0.40, 1.60], 'gpt-4.1-nano': [0.10, 0.40], // gptOss (Together/Groq/Cerebras/OpenRouter) 'openai/gpt-oss-120b': [0.15, 0.60], 'gpt-oss-120b': [0.15, 0.60], 'openai/gpt-oss-120b:free': [0, 0], // Anthropic 'claude-opus-4-7': [5.00, 25.00], 'claude-opus-4-6': [5.00, 25.00], 'claude-opus-4-5-20251101': [5.00, 25.00], 'claude-opus-4-1-20250805': [15.00, 75.00], 'claude-sonnet-4-6': [3.00, 15.00], 'claude-sonnet-4-5-20250929': [3.00, 15.00], 'claude-sonnet-4-20250514': [3.00, 15.00], 'claude-3-5-haiku-20241022': [0.80, 4.00], 'claude-haiku-4-5-20251001': [1.00, 5.00], // Google 'gemini-3.1-pro-preview':[2.00, 12.00], 'gemini-3-pro-preview': [2.00, 12.00], 'gemini-3-flash-preview': [0.50, 3.00], 'gemini-2.5-pro': [1.25, 10.00], 'gemini-2.5-flash': [0.30, 2.50], 'gemini-3.1-flash-lite-preview': [0.25, 1.50], // Grok 'grok-4.3': [1.25, 2.50], 'grok-4.20-multi-agent-0309': [1.25, 2.50], 'grok-4.20-0309-reasoning': [1.25, 2.50], 'grok-4.20-0309-non-reasoning': [1.25, 2.50], 'grok-4-1-fast-reasoning': [0.20, 0.50], 'grok-4-1-fast-non-reasoning': [0.20, 0.50], // Fireworks 'accounts/fireworks/models/deepseek-v3p2': [0.56, 1.68], 'accounts/fireworks/models/deepseek-v4-pro': [1.74, 3.48], 'deepseek-ai/DeepSeek-V4-Pro': [2.10, 4.40], 'accounts/fireworks/models/glm-4p7': [0.55, 2.19], 'accounts/fireworks/models/glm-5p1': [1.05, 3.50], 'accounts/fireworks/models/kimi-k2p5': [0.50, 2.80], 'accounts/fireworks/models/qwen3p6-plus': [0.50, 3.00], 'Qwen/Qwen3.6-Plus': [0.50, 3.00], 'fireworks/glm-5': [1.00, 3.20], // MiniMax 'MiniMax-M2.5': [0.30, 1.20], 'MiniMax-M2.7': [0.30, 1.20], 'fireworks/minimax-m2p5': [0.30, 1.20], 'minimax/minimax-m2.7': [0.30, 1.20], // Perplexity 'sonar': [1.00, 1.00], 'sonar-pro': [3.00, 15.00], // Scout (Groq/Together/Cerebras) 'meta-llama/llama-4-scout-17b-16e-instruct': [0.11, 0.34], 'meta-llama/Llama-4-Scout-17B-16E-Instruct': [0.11, 0.34], 'llama-4-scout-17b-16e-instruct': [0.11, 0.34], // Maverick (Groq/Together/Lambda) 'meta-llama/llama-4-maverick-17b-128e-instruct': [0.20, 0.60], 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8': [0.20, 0.60], 'llama-4-maverick-17b-128e-instruct-fp8': [0.20, 0.60], // Hermes3 (Lambda/OpenRouter) 'Hermes-3-Llama-3.1-405B-FP8': [0.80, 0.80], 'nousresearch/hermes-3-llama-3.1-405b:free': [0, 0], // Qwen3 (Together/Cerebras) 'Qwen/Qwen3-235B-A22B-fp8-tput': [0.20, 0.60], 'qwen-3-32b': [0.20, 0.60], // Kimi K2.5 (Together/Fireworks/OpenRouter) 'moonshotai/Kimi-K2.5': [0.50, 2.80], 'moonshotai/kimi-k2.5': [0.50, 2.80], // DeepSeek V3.2 (OpenRouter) 'deepseek/deepseek-v3.2': [0.56, 1.68], // GLM 4.7 (OpenRouter/Cerebras) 'z-ai/glm-4.7': [0.55, 2.19], 'zai-glm-4.7': [0.55, 2.19], // DeepSeek R1 (OpenRouter free) 'deepseek/deepseek-r1-0528:free': [0, 0], }; class ModelMix { constructor({ options = {}, config = {}, mix = {} } = {}) { this.models = []; this.messages = []; this.tools = {}; this.toolClient = {}; this.mcp = {}; this.mcpToolsManager = new MCPToolsManager(); this.lastRaw = null; this.options = { max_tokens: 8192, temperature: 1, // 1 --> More creative, 0 --> More deterministic. ...options }; // Standard Bottleneck configuration const defaultBottleneckConfig = { maxConcurrent: 8, // Maximum number of concurrent requests minTime: 500, // Minimum time between requests (in ms) }; this.config = { system: 'You are an assistant.', max_history: 0, // 0=no history (stateless), N=keep last N messages, -1=unlimited debug: 0, // 0=silent, 1=minimal, 2=readable summary, 3=full (no truncate), 4=verbose (raw details) bottleneck: defaultBottleneckConfig, retry: { enabled: false, retries: 2, baseDelayMs: 500, maxDelayMs: 5000, retryableStatusCodes: [...DEFAULT_RETRYABLE_STATUS_CODES] }, roundRobin: false, // false=fallback mode, true=round robin rotation ...config } const freeMix = { openrouter: true, cerebras: true, groq: true, together: false, lambda: false }; this.mix = { ...freeMix, ...mix }; this.limiter = new Bottleneck(this.config.bottleneck); } replace(keyValues) { this.config.replace = { ...this.config.replace, ...keyValues }; return this; } static new({ options = {}, config = {}, mix = {} } = {}) { return new ModelMix({ options, config, mix }); } new({ options = {}, config = {}, mix = {} } = {}) { const instance = new ModelMix({ options: { ...this.options, ...options }, config: { ...this.config, ...config }, mix: { ...this.mix, ...mix } }); instance.models = this.models; // Share models array for round-robin rotation return instance; } static formatJSON(obj) { return inspect(obj, { depth: null, colors: true, maxArrayLength: null, breakLength: 80, compact: false }); } static formatMessage(message) { if (typeof message !== 'string') return message; try { return ModelMix.formatJSON(JSON.parse(message.trim())); } catch (e) { return message; } } // debug logging helpers static truncate(str, maxLen = 1000) { if (!str || typeof str !== 'string') return str; return str.length > maxLen ? str.substring(0, maxLen) + '...' : str; } static calculateCost(modelKey, tokens) { const pricing = MODEL_PRICING[modelKey]; if (!pricing) return null; const [inputPerMillion, outputPerMillion] = pricing; return (tokens.input * inputPerMillion / 1_000_000) + (tokens.output * outputPerMillion / 1_000_000); } static extractCacheTokens(usage = {}) { return usage.input_tokens_details?.cached_tokens || usage.prompt_tokens_details?.cached_tokens || usage.cache_read_input_tokens || usage.cachedContentTokenCount || usage.cached_content_token_count || 0; } static formatInputSummary(messages, system, debug = 2) { const lastMessage = messages[messages.length - 1]; let inputText = ''; if (lastMessage && Array.isArray(lastMessage.content)) { const textContent = lastMessage.content.find(c => c.type === 'text'); if (textContent) inputText = textContent.text; } else if (lastMessage && typeof lastMessage.content === 'string') { inputText = lastMessage.content; } const noTruncate = debug >= 3; const systemStr = noTruncate ? (system || '') : ModelMix.truncate(system, 500); const inputStr = noTruncate ? inputText : ModelMix.truncate(inputText, 1200); const msgCount = `(${messages.length} msg${messages.length !== 1 ? 's' : ''})`; return `| SYSTEM\n${systemStr}\n| INPUT ${msgCount}\n${inputStr}`; } static formatOutputSummary(result, debug) { const parts = []; const noTruncate = debug >= 3; if (result.message) { // Try to parse as JSON for better formatting try { const parsed = JSON.parse(result.message.trim()); // If it's valid JSON and debug >= 2, show it formatted if (debug >= 2) { parts.push(`| OUTPUT (JSON)\n${ModelMix.formatJSON(parsed)}`); } else { parts.push(`| OUTPUT\n${ModelMix.truncate(result.message, 1500)}`); } } catch (e) { parts.push(`| OUTPUT\n${noTruncate ? result.message : ModelMix.truncate(result.message, 1500)}`); } } if (result.think) { parts.push(`| THINK\n${noTruncate ? result.think : ModelMix.truncate(result.think, 800)}`); } if (result.toolCalls && result.toolCalls.length > 0) { const toolNames = result.toolCalls.map(t => t.function?.name || t.name).join(', '); parts.push(`| TOOLS\n${toolNames}`); } return parts.join('\n'); } attach(key, provider) { if (this.models.some(model => model.key === key)) { return this; } if (this.messages.length > 0) { throw new Error("Cannot add models after message generation has started."); } this.models.push({ key, provider }); return this; } gpt41({ options = {}, config = {} } = {}) { return this.attach('gpt-4.1', new MixOpenAI({ options, config })); } gpt41mini({ options = {}, config = {} } = {}) { return this.attach('gpt-4.1-mini', new MixOpenAI({ options, config })); } gpt41nano({ options = {}, config = {} } = {}) { return this.attach('gpt-4.1-nano', new MixOpenAI({ options, config })); } gpt5({ options = {}, config = {} } = {}) { return this.attach('gpt-5', new MixOpenAI({ options, config })); } gpt5mini({ options = {}, config = {} } = {}) { return this.attach('gpt-5-mini', new MixOpenAI({ options, config })); } gpt5nano({ options = {}, config = {} } = {}) { return this.attach('gpt-5-nano', new MixOpenAI({ options, config })); } gpt51({ options = {}, config = {} } = {}) { return this.attach('gpt-5.1', new MixOpenAIResponses({ options, config })); } gpt52({ options = {}, config = {} } = {}) { return this.attach('gpt-5.2', new MixOpenAIResponses({ options, config })); } gpt54({ options = {}, config = {} } = {}) { return this.attach('gpt-5.4', new MixOpenAIResponses({ options, config })); } gpt54mini({ options = {}, config = {} } = {}) { return this.attach('gpt-5.4-mini', new MixOpenAIResponses({ options, config })); } gpt54nano({ options = {}, config = {} } = {}) { return this.attach('gpt-5.4-nano', new MixOpenAIResponses({ options, config })); } gpt54pro({ options = {}, config = {} } = {}) { return this.attach('gpt-5.4-pro', new MixOpenAIResponses({ options, config })); } gpt55({ options = {}, config = {} } = {}) { return this.attach('gpt-5.5', new MixOpenAIResponses({ options, config })); } gpt55pro({ options = {}, config = {} } = {}) { return this.attach('gpt-5.5-pro', new MixOpenAIResponses({ options, config })); } gptRealtime({ options = {}, config = {} } = {}) { return this.attach('gpt-realtime', new MixOpenAIWebSocket({ options, config })); } gptRealtimeMini({ options = {}, config = {} } = {}) { return this.attach('gpt-realtime-mini', new MixOpenAIWebSocket({ options, config })); } gpt53codex({ options = {}, config = {} } = {}) { return this.attach('gpt-5.3-codex', new MixOpenAIResponses({ options, config })); } gpt53chat({ options = {}, config = {} } = {}) { return this.attach('gpt-5.3-chat-latest', new MixOpenAIResponses({ options, config })); } gptOss({ options = {}, config = {}, mix = {} } = {}) { mix = { ...this.mix, ...mix }; if (mix.together) this.attach('openai/gpt-oss-120b', new MixTogether({ options, config })); if (mix.cerebras) this.attach('gpt-oss-120b', new MixCerebras({ options, config })); if (mix.groq) this.attach('openai/gpt-oss-120b', new MixGroq({ options, config })); if (mix.openrouter) this.attach('openai/gpt-oss-120b:free', new MixOpenRouter({ options, config })); return this; } opus47think({ options = {}, config = {} } = {}) { options = { ...MixAnthropic.thinkingOptions, ...options }; return this.attach('claude-opus-4-7', new MixAnthropic({ options, config })); } opus46think({ options = {}, config = {} } = {}) { options = { ...MixAnthropic.thinkingOptions, ...options }; return this.attach('claude-opus-4-6', new MixAnthropic({ options, config })); } opus45think({ options = {}, config = {} } = {}) { options = { ...MixAnthropic.thinkingOptions, ...options }; return this.attach('claude-opus-4-5-20251101', new MixAnthropic({ options, config })); } opus47({ options = {}, config = {} } = {}) { return this.attach('claude-opus-4-7', new MixAnthropic({ options, config })); } opus46({ options = {}, config = {} } = {}) { return this.attach('claude-opus-4-6', new MixAnthropic({ options, config })); } opus45({ options = {}, config = {} } = {}) { return this.attach('claude-opus-4-5-20251101', new MixAnthropic({ options, config })); } opus41({ options = {}, config = {} } = {}) { return this.attach('claude-opus-4-1-20250805', new MixAnthropic({ options, config })); } opus41think({ options = {}, config = {} } = {}) { options = { ...MixAnthropic.thinkingOptions, ...options }; return this.attach('claude-opus-4-1-20250805', new MixAnthropic({ options, config })); } sonnet4({ options = {}, config = {} } = {}) { return this.attach('claude-sonnet-4-20250514', new MixAnthropic({ options, config })); } sonnet4think({ options = {}, config = {} } = {}) { options = { ...MixAnthropic.thinkingOptions, ...options }; return this.attach('claude-sonnet-4-20250514', new MixAnthropic({ options, config })); } sonnet46({ options = {}, config = {} } = {}) { return this.attach('claude-sonnet-4-6', new MixAnthropic({ options, config })); } sonnet46think({ options = {}, config = {} } = {}) { options = { ...MixAnthropic.thinkingOptions, ...options }; return this.attach('claude-sonnet-4-6', new MixAnthropic({ options, config })); } sonnet45({ options = {}, config = {} } = {}) { return this.attach('claude-sonnet-4-5-20250929', new MixAnthropic({ options, config })); } sonnet45think({ options = {}, config = {} } = {}) { options = { ...MixAnthropic.thinkingOptions, ...options }; return this.attach('claude-sonnet-4-5-20250929', new MixAnthropic({ options, config })); } haiku35({ options = {}, config = {} } = {}) { return this.attach('claude-3-5-haiku-20241022', new MixAnthropic({ options, config })); } haiku45({ options = {}, config = {} } = {}) { return this.attach('claude-haiku-4-5-20251001', new MixAnthropic({ options, config })); } haiku45think({ options = {}, config = {} } = {}) { options = { ...MixAnthropic.thinkingOptions, ...options }; return this.attach('claude-haiku-4-5-20251001', new MixAnthropic({ options, config })); } gemini25flash({ options = {}, config = {} } = {}) { return this.attach('gemini-2.5-flash', new MixGoogle({ options, config })); } gemini31pro({ options = {}, config = {} } = {}) { return this.attach('gemini-3.1-pro-preview', new MixGoogle({ options, config })); } gemini3pro({ options = {}, config = {} } = {}) { return this.attach('gemini-3-pro-preview', new MixGoogle({ options, config })); } gemini3flash({ options = {}, config = {} } = {}) { return this.attach('gemini-3-flash-preview', new MixGoogle({ options, config })); } gemini35flash({ options = {}, config = {} } = {}) { return this.attach('gemini-3.5-flash', new MixGoogle({ options, config })); } gemini31flashLite({ options = {}, config = {} } = {}) { return this.attach('gemini-3.1-flash-lite-preview', new MixGoogle({ options, config })); } gemini25pro({ options = {}, config = {} } = {}) { return this.attach('gemini-2.5-pro', new MixGoogle({ options, config })); } sonarPro({ options = {}, config = {} } = {}) { return this.attach('sonar-pro', new MixPerplexity({ options, config })); } sonar({ options = {}, config = {} } = {}) { return this.attach('sonar', new MixPerplexity({ options, config })); } grok43({ options = {}, config = {} } = {}) { return this.attach('grok-4.3', new MixGrok({ options, config })); } grok420multiAgent({ options = {}, config = {} } = {}) { return this.attach('grok-4.20-multi-agent-0309', new MixGrok({ options, config })); } grok420think({ options = {}, config = {} } = {}) { return this.attach('grok-4.20-0309-reasoning', new MixGrok({ options, config })); } grok420({ options = {}, config = {} } = {}) { return this.attach('grok-4.20-0309-non-reasoning', new MixGrok({ options, config })); } grok41think({ options = {}, config = {} } = {}) { return this.attach('grok-4-1-fast-reasoning', new MixGrok({ options, config })); } grok41({ options = {}, config = {} } = {}) { return this.attach('grok-4-1-fast-non-reasoning', new MixGrok({ options, config })); } qwen3({ options = {}, config = {}, mix = { together: true, cerebras: false } } = {}) { if (mix.together) this.attach('Qwen/Qwen3-235B-A22B-fp8-tput', new MixTogether({ options, config })); if (mix.cerebras) this.attach('qwen-3-32b', new MixCerebras({ options, config })); return this; } qwen36plus({ options = {}, config = {}, mix = { fireworks: true } } = {}) { mix = { ...this.mix, ...mix }; if (mix.fireworks) this.attach('accounts/fireworks/models/qwen3p6-plus', new MixFireworks({ options, config })); if (mix.together) this.attach('Qwen/Qwen3.6-Plus', new MixTogether({ options, config })); return this; } scout({ options = {}, config = {}, mix = {} } = {}) { mix = { ...this.mix, ...mix }; if (mix.groq) this.attach('meta-llama/llama-4-scout-17b-16e-instruct', new MixGroq({ options, config })); if (mix.together) this.attach('meta-llama/Llama-4-Scout-17B-16E-Instruct', new MixTogether({ options, config })); if (mix.cerebras) this.attach('llama-4-scout-17b-16e-instruct', new MixCerebras({ options, config })); return this; } maverick({ options = {}, config = {}, mix = {} } = {}) { mix = { ...this.mix, ...mix }; if (mix.together) this.attach('meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8', new MixTogether({ options, config })); if (mix.lambda) this.attach('llama-4-maverick-17b-128e-instruct-fp8', new MixLambda({ options, config })); return this; } deepseekR1({ options = {}, config = {}, mix = {} } = {}) { mix = { ...this.mix, ...mix }; if (mix.groq) this.attach('deepseek-r1-distill-llama-70b', new MixGroq({ options, config })); if (mix.together) this.attach('deepseek-ai/DeepSeek-R1', new MixTogether({ options, config })); if (mix.cerebras) this.attach('deepseek-r1-distill-llama-70b', new MixCerebras({ options, config })); if (mix.openrouter) this.attach('deepseek/deepseek-r1-0528:free', new MixOpenRouter({ options, config })); return this; } hermes3({ options = {}, config = {}, mix = {} } = {}) { mix = { ...this.mix, ...mix }; if (mix.lambda) this.attach('Hermes-3-Llama-3.1-405B-FP8', new MixLambda({ options, config })); if (mix.openrouter) this.attach('nousresearch/hermes-3-llama-3.1-405b:free', new MixOpenRouter({ options, config })); return this; } kimiK26think({ options = {}, config = {}, mix = { fireworks: true } } = {}) { mix = { ...this.mix, ...mix }; if (mix.fireworks) this.attach('accounts/fireworks/models/kimi-k2p6', new MixFireworks({ options, config })); if (mix.openrouter) this.attach('moonshotai/kimi-k2.6', new MixOpenRouter({ options, config })); if (mix.together) this.attach('moonshotai/Kimi-K2.6', new MixTogether({ options, config })); return this; } kimiK25think({ options = {}, config = {}, mix = { together: true } } = {}) { mix = { ...this.mix, ...mix }; if (mix.together) this.attach('moonshotai/Kimi-K2.5', new MixTogether({ options, config })); if (mix.fireworks) this.attach('accounts/fireworks/models/kimi-k2p5', new MixFireworks({ options, config })); if (mix.openrouter) this.attach('moonshotai/kimi-k2.5', new MixOpenRouter({ options, config })); return this; } lmstudio(model = 'lmstudio', { options = {}, config = {} } = {}) { return this.attach(model, new MixLMStudio({ options, config })); } minimaxM25({ options = {}, config = {}, mix = { minimax: true } } = {}) { mix = { ...this.mix, ...mix }; if (mix.minimax) this.attach('MiniMax-M2.5', new MixMiniMax({ options, config })); if (mix.fireworks) this.attach('fireworks/minimax-m2p5', new MixFireworks({ options, config })); return this; } minimaxM27({ options = {}, config = {}, mix = { openrouter: true, minimax: true } } = {}) { mix = { ...this.mix, ...mix }; if (mix.nvidia) this.attach('minimaxai/minimax-m2.7', new MixNVIDIA({ options, config })); if (mix.openrouter) return this.attach('minimax/minimax-m2.7', new MixOpenRouter({ options, config })); if (mix.minimax) return this.attach('MiniMax-M2.7', new MixMiniMax({ options, config })); if (mix.together) return this.attach('MiniMaxAI/MiniMax-M2.7', new MixTogether({ options, config })); return this; } mimo25({ options = {}, config = {}, mix = { openrouter: true } } = {}) { mix = { ...this.mix, ...mix }; if (mix.openrouter) this.attach('xiaomi/mimo-v2.5', new MixOpenRouter({ options, config })); if (mix.mimo) this.attach('mimo-v2.5', new MixMiMo({ options, config })); return this; } mimo25pro({ options = {}, config = {}, mix = { openrouter: true } } = {}) { mix = { ...this.mix, ...mix }; if (mix.openrouter) this.attach('xiaomi/mimo-v2.5-pro', new MixOpenRouter({ options, config })); if (mix.mimo) this.attach('mimo-v2.5-pro', new MixMiMo({ options, config })); return this; } deepseekV4Pro({ options = {}, config = {}, mix = { fireworks: true } } = {}) { mix = { ...this.mix, ...mix }; if (mix.nvidia) this.attach('deepseek-ai/deepseek-v4-pro', new MixNVIDIA({ options, config })); if (mix.fireworks) this.attach('accounts/fireworks/models/deepseek-v4-pro', new MixFireworks({ options, config })); if (mix.openrouter) this.attach('deepseek/deepseek-v4-pro', new MixOpenRouter({ options, config })); if (mix.together) this.attach('deepseek-ai/DeepSeek-V4-Pro', new MixTogether({ options, config })); return this; } deepseekV4Flash({ options = {}, config = {}, mix = { nvidia: true } } = {}) { mix = { ...this.mix, ...mix }; if (mix.nvidia) this.attach('deepseek-ai/deepseek-v4-flash', new MixNVIDIA({ options, config })); return this; } GLM51({ options = {}, config = {}, mix = { fireworks: true } } = {}) { mix = { ...this.mix, ...mix }; if (mix.nvidia) this.attach('z-ai/glm-5.1', new MixNVIDIA({ options, config })); if (mix.fireworks) this.attach('accounts/fireworks/models/glm-5p1', new MixFireworks({ options, config })); if (mix.openrouter) this.attach('z-ai/glm-5.1', new MixOpenRouter({ options, config })); if (mix.together) this.attach('zai-org/GLM-5.1', new MixTogether({ options, config })); return this; } GLM5({ options = {}, config = {}, mix = { fireworks: true } } = {}) { mix = { ...this.mix, ...mix }; if (mix.fireworks) this.attach('fireworks/glm-5', new MixFireworks({ options, config })); return this; } GLM47({ options = {}, config = {}, mix = { fireworks: true } } = {}) { mix = { ...this.mix, ...mix }; if (mix.fireworks) this.attach('accounts/fireworks/models/glm-4p7', new MixFireworks({ options, config })); if (mix.openrouter) this.attach('z-ai/glm-4.7', new MixOpenRouter({ options, config })); if (mix.cerebras) this.attach('zai-glm-4.7', new MixCerebras({ options, config })); return this; } addText(text, { role = "user" } = {}) { const content = [{ type: "text", text }]; this.messages.push({ role, content }); return this; } addTextFromFile(filePath, { role = "user" } = {}) { const content = this.readFile(filePath); this.addText(content, { role }); return this; } setSystem(text) { this.config.system = text; return this; } setSystemFromFile(filePath) { const content = this.readFile(filePath); this.setSystem(content); return this; } addImageFromBuffer(buffer, { role = "user" } = {}) { this.messages.push({ role, content: [{ type: "image", source: { type: "buffer", data: buffer } }] }); return this; } addImage(filePath, { role = "user" } = {}) { const absolutePath = path.resolve(filePath); if (!fs.existsSync(absolutePath)) { throw new Error(`Image file not found: ${filePath}`); } this.messages.push({ role, content: [{ type: "image", source: { type: "file", data: filePath } }] }); return this; } addImageFromUrl(url, { role = "user" } = {}) { let source; if (url.startsWith('data:')) { // Parse data URL: data:image/jpeg;base64,/9j/4AAQ... const match = url.match(/^data:([^;]+);base64,(.+)$/); if (match) { source = { type: "base64", media_type: match[1], data: match[2] }; } else { throw new Error('Invalid data URL format'); } } else { source = { type: "url", data: url }; } this.messages.push({ role, content: [{ type: "image", source }] }); return this; } async processImages() { for (let i = 0; i < this.messages.length; i++) { const message = this.messages[i]; if (!Array.isArray(message.content)) continue; for (let j = 0; j < message.content.length; j++) { const content = message.content[j]; if (content.type !== 'image' || content.source.type === 'base64') continue; try { let buffer, mimeType; switch (content.source.type) { case 'url': const response = await fetchBinaryResponse(content.source.data); buffer = response.data; mimeType = response.headers['content-type']; break; case 'file': buffer = this.readFile(content.source.data, { encoding: null }); break; case 'buffer': buffer = content.source.data; break; } // Detect mimeType if not provided if (!mimeType) { if (typeof detectFileTypeFromBuffer !== 'function') { throw new Error('file-type module does not expose a buffer detector'); } const detectedType = await detectFileTypeFromBuffer(buffer); if (!detectedType || !detectedType.mime.startsWith('image/')) { throw new Error(`Invalid image - unable to detect valid image format`); } mimeType = detectedType.mime; } // Update the content with processed image message.content[j] = { type: "image", source: { type: "base64", media_type: mimeType, data: buffer.toString('base64') } }; } catch (error) { console.error(`Error processing image:`, error); // Remove failed image from content message.content.splice(j, 1); j--; } } } } async message() { let raw = await this.execute({ options: { stream: false } }); return raw.message; } async json(schemaExample = null, schemaDescription = {}, { type = 'json_object', addExample = false, addSchema = true, addNote = false } = {}) { let isArrayWrap = false; if (Array.isArray(schemaExample)) { isArrayWrap = true; schemaExample = { out: schemaExample }; if (Array.isArray(schemaDescription)) { schemaDescription = { out: schemaDescription }; } } let options = { response_format: { type }, stream: false, } // Apply template replacements to system before adding extra instructions let systemWithReplacements = this._template(this.config.system, this.config.replace); let config = { system: systemWithReplacements, } if (schemaExample) { config.schema = generateJsonSchema(schemaExample, schemaDescription); if (addSchema) { config.system += "\n\nOutput JSON Schema: \n```\n" + JSON.stringify(config.schema) + "\n```"; } if (addExample) { config.system += "\n\nOutput JSON Example: \n```\n" + JSON.stringify(schemaExample) + "\n```"; } if (addNote) { config.system += "\n\nOutput JSON Escape: double quotes, backslashes, and control characters inside JSON strings.\nEnsure the output contains no comments."; } } const { message } = await this.execute({ options, config }); const parsed = JSON.parse(this._extractBlock(message)); return isArrayWrap ? parsed.out : parsed; } _extractBlock(response) { const block = response.match(/```(?:\w+)?\s*([\s\S]*?)```/); return block ? block[1].trim() : response.trim(); } async block({ addSystemExtra = true } = {}) { // Apply template replacements to system before adding extra instructions let systemWithReplacements = this._template(this.config.system, this.config.replace); let config = { system: systemWithReplacements, } if (addSystemExtra) { config.system += "\nReturn the result of the task between triple backtick block code tags ```"; } const { message } = await this.execute({ options: { stream: false }, config }); return this._extractBlock(message); } async raw() { return this.execute({ options: { stream: false } }); } async stream(callback) { this.streamCallback = callback; return this.execute({ options: { stream: true } }); } replaceKeyFromFile(key, filePath) { try { const content = this.readFile(filePath); this.replace({ [key]: this._template(content, this.config.replace) }); } catch (error) { // Gracefully handle file read errors without throwing log.warn(`replaceKeyFromFile: ${error.message}`); } return this; } _template(input, replace) { if (!replace) return input; for (const k in replace) { input = input.split(/([¿?¡!,"';:\(\)\.\s])/).map(x => x === k ? replace[k] : x).join(""); } return input; } groupByRoles(messages) { return messages.reduce((acc, currentMessage, index) => { // Don't group tool messages or assistant messages with tool_calls // Each tool response must be separate with its own tool_call_id const shouldNotGroup = currentMessage.role === 'tool' || currentMessage.tool_calls || currentMessage.tool_call_id; if (index === 0 || currentMessage.role !== messages[index - 1].role || shouldNotGroup) { // acc.push({ // role: currentMessage.role, // content: currentMessage.content // }); acc.push(currentMessage); } else { acc[acc.length - 1].content = acc[acc.length - 1].content.concat(currentMessage.content); } return acc; }, []); } applyTemplate() { if (!this.config.replace) return; this.config.system = this._template(this.config.system, this.config.replace); this.messages = this.messages.map(message => { if (message.content instanceof Array) { message.content = message.content.map(content => { if (content.type === 'text') { content.text = this._template(content.text, this.config.replace); } return content; }); } return message; }); } async prepareMessages() { await this.processImages(); this.applyTemplate(); // Smart message slicing based on max_history: // 0 = no history (stateless), N = keep last N messages, -1 = unlimited if (this.config.max_history > 0) { let sliceStart = Math.max(0, this.messages.length - this.config.max_history); // If we're slicing into the middle of a tool interaction, // backtrack to include the full sequence (user → assistant/tool_calls → tool results) while (sliceStart > 0 && sliceStart < this.messages.length) { const msg = this.messages[sliceStart]; if (msg.role === 'tool' || (msg.role === 'assistant' && msg.tool_calls)) { sliceStart--; } else { break; } } this.messages = this.messages.slice(sliceStart); } // max_history = -1: unlimited, no slicing // max_history = 0: no history, messages only contain what was added since last call this.messages = this.groupByRoles(this.messages); this.options.messages = this.messages; } readFile(filePath, { encoding = 'utf8' } = {}) { try { const absolutePath = path.resolve(filePath); return fs.readFileSync(absolutePath, { encoding }); } catch (error) { if (error.code === 'ENOENT') { throw new Error(`File not found: ${filePath}`); } else if (error.code === 'EACCES') { throw new Error(`Permission denied: ${filePath}`); } else { throw new Error(`Error reading file ${filePath}: ${error.message}`); } } } async execute({ config = {}, options = {} } = {}) { if (!this.models || this.models.length === 0) { throw new Error("No models specified. Use methods like .gpt5(), .sonnet4() first."); } return this.limiter.schedule(async () => { await this.prepareMessages(); if (this.messages.length === 0) { throw new Error("No user messages have been added. Use addText(prompt), addTextFromFile(filePath), addImage(filePath), or addImageFromUrl(url) to add a prompt."); } // Merge config to get final roundRobin value and retry settings const finalConfig = { ...this.config, ...config, retry: { ...(this.config.retry || {}), ...(config.retry || {}) } }; // Try all models in order (first is primary, rest are fallbacks) const modelsToTry = this.models.map((model, index) => ({ model, index })); // Round robin: rotate models array AFTER using current for next request if (finalConfig.roundRobin && this.models.length > 1) { const firstModel = this.models.shift(); this.models.push(firstModel); } let lastError = null; for (let i = 0; i < modelsToTry.length; i++) { const { model: currentModel, index: originalIndex } = modelsToTry[i]; const currentModelKey = currentModel.key; const providerInstance = currentModel.provider; const optionsTools = providerInstance.getOptionsTools(this.tools); // Create clean copies for each provider to avoid contamination const currentOptions = { ...this.options, ...providerInstance.options, ...optionsTools, ...options, model: currentModelKey }; const currentConfig = { ...finalConfig, ...providerInstance.config, ...config, retry: { ...(finalConfig.retry || {}), ...(providerInstance.config?.retry || {}), ...(config.retry || {}) } }; if (currentConfig.debug >= 1) { const isPrimary = i === 0; const prefix = isPrimary ? '→' : '↻'; const suffix = isPrimary ? (currentConfig.roundRobin ? ` (round-robin #${originalIndex + 1})` : '') : ' (fallback)'; // Extract provider name from class name (e.g., "MixOpenRouter" -> "openrouter") const providerName = providerInstance.constructor.name.replace(/^Mix/, '').toLowerCase(); const header = `\n${prefix} [${providerName}:${currentModelKey}] #${originalIndex + 1}${suffix}`; if (currentConfig.debug >= 2) { console.log(`${header}\n${ModelMix.formatInputSummary(this.messages, currentConfig.system, currentConfig.debug)}`); } else { console.log(header); } } try { if (currentOptions.stream && this.streamCallback) { providerInstance.streamCallback = this.streamCallback; } const retryConfig = currentConfig.retry || {}; const retries = retryConfig.enabled ? Math.max(0, retryConfig.retries || 0) : 0; const baseDelayMs = Math.max(0, retryConfig.baseDelayMs || 0); const maxDelayMs = Math.max(baseDelayMs, retryConfig.maxDelayMs || baseDelayMs); const retryableStatusCodes = new Set( Array.isArray(retryConfig.retryableStatusCodes) && retryConfig.retryableStatusCodes.length > 0 ? retryConfig.retryableStatusCodes : DEFAULT_RETRYABLE_STATUS_CODES ); let attempt = 0; let result; let startTime = 0; while (true) { try { startTime = Date.now(); result = await providerInstance.create({ options: currentOptions, config: currentConfig }); break; } catch (attemptError) { const statusCode = getErrorStatusCode(attemptError); const isRetryable = retryableStatusCodes.has(statusCode); const canRetry = attempt < retries && isRetryable; if (!canRetry) { throw attemptError; } if (currentConfig.debug >= 1) { const nextAttempt = attempt + 2; const totalAttempts = retries + 1; console.log(`↺ Retrying [${currentModelKey}] due to status ${statusCode} (${nextAttempt}/${totalAttempts})`); } const delay = Math.min(baseDelayMs * Math.pow(2, attempt), maxDelayMs); await sleep(delay); attempt += 1; } } const elapsedMs = Date.now() - startTime; if (result.tokens) { result.tokens.cost = ModelMix.calculateCost(currentModelKey, result.tokens); const elapsedSec = elapsedMs / 1000; result.tokens.speed = elapsedSec > 0 ? Math.round(result.tokens.output / elapsedSec) : 0; } if (result.toolCalls && result.toolCalls.length > 0) { if (result.message) { if (result.signature) { this.messages.push({ role: "assistant", content: [{ type: "thinking", thinking: result.think, signature: result.signature }] }); } else { this.addText(result.message, { role: "assistant" }); } } this.messages.push({ role: "assistant", content: null, tool_calls: result.toolCalls }); const toolResults = await this.processToolCalls(result.toolCalls); for (const toolResult of toolResults) { this.messages.push({ role: 'tool', tool_call_id: toolResult.tool_call_id, name: toolResult.name, content: toolResult.content }); } return this.execute({ options, config }); } // debug level 1: Just success indicator if (currentConfig.debug === 1) { console.log(`✓ Success`); } // debug level 2: Readable summary of output if (currentConfig.debug >= 2) { const tokenInfo = result.tokens ? ` ${result.tokens.input} → ${result.tokens.output} tok` + (result.tokens.cached ? ` (cached:${result.tokens.cached})` : '') + (result.tokens.speed ? ` | ${result.tokens.speed} t/s` : '') + (result.tokens.cost != null ? ` $${result.tokens.cost.toFixed(4)}` : '') : ''; console.log(`✓${tokenInfo}\n${ModelMix.formatOutputSummary(result, currentConfig.debug).trim()}`); } // debug level 4 (verbose): Full response details if (currentConfig.debug >= 4) { if (result.response) { console.log('\n[RAW RESPONSE]'); console.log(ModelMix.formatJSON(result.response)); } if (result.message) { console.log('\n[FULL MESSAGE]'); console.log(ModelMix.formatMessage(result.message)); } if (result.think) { console.log('\n[FULL THINKING]'); console.log(result.think); } } if (currentConfig.debug >= 1) console.log(''); this.lastRaw = result; // Manage conversation history based on max_history setting if (this.config.max_history === 0) { // Stateless: clear messages so next call starts fresh this.messages = []; } else if (result.message) { // Persist assistant response for multi-turn conversations if (result.signature) { this.messages.push({ role: "assistant", content: [{ type: "thinking", thinking: result.think, signature: result.signature }, { type: "text", text: result.message }] }); } else { this.addText(result.message, { role: "assistant" }); } } return result; } catch (error) { lastError = error; log.warn(`Model ${currentModelKey} failed (Attempt #${i + 1}/${modelsToTry.length}).`); if (error.message) log.warn(`Error: ${error.message}`); if (error.statusCode) log.warn(`Status Code: ${error.statusCode}`); if (error.details) log.warn(`Details:\n${ModelMix.formatJSON(error.details)}`); if (i === modelsToTry.length - 1) { console.error(`All ${modelsToTry.length} model(s) failed. Throwing last error from ${currentModelKey}.`); throw lastError; } else { const nextModelKey = modelsToTry[i + 1].model.key; log.info(`-> Proceeding to next model: ${nextModelKey}`); } } } log.error("Fallback logic completed without success or throwing the final error."); throw lastError || new Error("Failed to get response from any model, and no specific error was caught."); }); } async processToolCalls(toolCalls) { const result = [] for (const toolCall of toolCalls) { // Handle different tool call formats more robustly let toolName, toolArgs, toolId; try { if (toolCall.function) { // Formato OpenAI/normalizado toolName = toolCall.function.name; toolArgs = typeof toolCall.function.arguments === 'string' ? JSON.parse(toolCall.function.arguments) : toolCall.function.arguments; toolId = toolCall.id; } else if (toolCall.name) {