modelmix
Version:
🧬 Reliable interface with automatic fallback for AI LLMs.
1,181 lines (1,041 loc) • 108 kB
JavaScript
const fs = require('fs');
const fileType = require('file-type');
const detectFileTypeFromBuffer = fileType.fileTypeFromBuffer || fileType.fromBuffer;
const { inspect } = require('util');
const log = require('lemonlog')('ModelMix');
const Bottleneck = require('bottleneck');
const path = require('path');
const WebSocket = require('ws');
const generateJsonSchema = require('./schema');
const { Client } = require("@modelcontextprotocol/sdk/client/index.js");
const { StdioClientTransport } = require("@modelcontextprotocol/sdk/client/stdio.js");
const { MCPToolsManager } = require('./mcp-tools');
const {
stripContentTypeHeader,
createMultipartFormData,
buildRequestBodyAndHeaders
} = require('./multipart');
const {
fetchJsonResponse,
fetchBinaryResponse,
fetchStreamResponse
} = require('./http-client');
const DEFAULT_RETRYABLE_STATUS_CODES = [408, 425, 429, 500, 502, 503, 504, 529];
function getErrorStatusCode(error) {
return error?.statusCode ?? error?.response?.status ?? error?.response?.statusCode ?? null;
}
function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
// Pricing per 1M tokens: [input, output] in USD
// Based on provider pricing pages linked in README
const MODEL_PRICING = {
// OpenAI
'gpt-realtime-mini': [0.60, 2.40],
'gpt-realtime': [4.00, 16.00],
'gpt-5.5-pro': [30.00, 180.00],
'gpt-5.5': [5.00, 30.00],
'gpt-5.4': [2.50, 15.00],
'gpt-5.4-pro': [30, 180.00],
'gpt-5.4-mini': [0.75, 4.50],
'gpt-5.4-nano': [0.20, 1.25],
'gpt-5.3-codex': [1.75, 14.00],
'gpt-5.2': [1.75, 14.00],
'gpt-5.2-chat-latest': [1.75, 14.00],
'gpt-5.1': [1.25, 10.00],
'gpt-5': [1.25, 10.00],
'gpt-5-mini': [0.25, 2.00],
'gpt-5-nano': [0.05, 0.40],
'gpt-4.1': [2.00, 8.00],
'gpt-4.1-mini': [0.40, 1.60],
'gpt-4.1-nano': [0.10, 0.40],
// gptOss (Together/Groq/Cerebras/OpenRouter)
'openai/gpt-oss-120b': [0.15, 0.60],
'gpt-oss-120b': [0.15, 0.60],
'openai/gpt-oss-120b:free': [0, 0],
// Anthropic
'claude-opus-4-7': [5.00, 25.00],
'claude-opus-4-6': [5.00, 25.00],
'claude-opus-4-5-20251101': [5.00, 25.00],
'claude-opus-4-1-20250805': [15.00, 75.00],
'claude-sonnet-4-6': [3.00, 15.00],
'claude-sonnet-4-5-20250929': [3.00, 15.00],
'claude-sonnet-4-20250514': [3.00, 15.00],
'claude-3-5-haiku-20241022': [0.80, 4.00],
'claude-haiku-4-5-20251001': [1.00, 5.00],
// Google
'gemini-3.1-pro-preview':[2.00, 12.00],
'gemini-3-pro-preview': [2.00, 12.00],
'gemini-3-flash-preview': [0.50, 3.00],
'gemini-2.5-pro': [1.25, 10.00],
'gemini-2.5-flash': [0.30, 2.50],
'gemini-3.1-flash-lite-preview': [0.25, 1.50],
// Grok
'grok-4.3': [1.25, 2.50],
'grok-4.20-multi-agent-0309': [1.25, 2.50],
'grok-4.20-0309-reasoning': [1.25, 2.50],
'grok-4.20-0309-non-reasoning': [1.25, 2.50],
'grok-4-1-fast-reasoning': [0.20, 0.50],
'grok-4-1-fast-non-reasoning': [0.20, 0.50],
// Fireworks
'accounts/fireworks/models/deepseek-v3p2': [0.56, 1.68],
'accounts/fireworks/models/deepseek-v4-pro': [1.74, 3.48],
'deepseek-ai/DeepSeek-V4-Pro': [2.10, 4.40],
'accounts/fireworks/models/glm-4p7': [0.55, 2.19],
'accounts/fireworks/models/glm-5p1': [1.05, 3.50],
'accounts/fireworks/models/kimi-k2p5': [0.50, 2.80],
'accounts/fireworks/models/qwen3p6-plus': [0.50, 3.00],
'Qwen/Qwen3.6-Plus': [0.50, 3.00],
'fireworks/glm-5': [1.00, 3.20],
// MiniMax
'MiniMax-M2.5': [0.30, 1.20],
'MiniMax-M2.7': [0.30, 1.20],
'fireworks/minimax-m2p5': [0.30, 1.20],
'minimax/minimax-m2.7': [0.30, 1.20],
// Perplexity
'sonar': [1.00, 1.00],
'sonar-pro': [3.00, 15.00],
// Scout (Groq/Together/Cerebras)
'meta-llama/llama-4-scout-17b-16e-instruct': [0.11, 0.34],
'meta-llama/Llama-4-Scout-17B-16E-Instruct': [0.11, 0.34],
'llama-4-scout-17b-16e-instruct': [0.11, 0.34],
// Maverick (Groq/Together/Lambda)
'meta-llama/llama-4-maverick-17b-128e-instruct': [0.20, 0.60],
'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8': [0.20, 0.60],
'llama-4-maverick-17b-128e-instruct-fp8': [0.20, 0.60],
// Hermes3 (Lambda/OpenRouter)
'Hermes-3-Llama-3.1-405B-FP8': [0.80, 0.80],
'nousresearch/hermes-3-llama-3.1-405b:free': [0, 0],
// Qwen3 (Together/Cerebras)
'Qwen/Qwen3-235B-A22B-fp8-tput': [0.20, 0.60],
'qwen-3-32b': [0.20, 0.60],
// Kimi K2.5 (Together/Fireworks/OpenRouter)
'moonshotai/Kimi-K2.5': [0.50, 2.80],
'moonshotai/kimi-k2.5': [0.50, 2.80],
// DeepSeek V3.2 (OpenRouter)
'deepseek/deepseek-v3.2': [0.56, 1.68],
// GLM 4.7 (OpenRouter/Cerebras)
'z-ai/glm-4.7': [0.55, 2.19],
'zai-glm-4.7': [0.55, 2.19],
// DeepSeek R1 (OpenRouter free)
'deepseek/deepseek-r1-0528:free': [0, 0],
};
class ModelMix {
constructor({ options = {}, config = {}, mix = {} } = {}) {
this.models = [];
this.messages = [];
this.tools = {};
this.toolClient = {};
this.mcp = {};
this.mcpToolsManager = new MCPToolsManager();
this.lastRaw = null;
this.options = {
max_tokens: 8192,
temperature: 1, // 1 --> More creative, 0 --> More deterministic.
...options
};
// Standard Bottleneck configuration
const defaultBottleneckConfig = {
maxConcurrent: 8, // Maximum number of concurrent requests
minTime: 500, // Minimum time between requests (in ms)
};
this.config = {
system: 'You are an assistant.',
max_history: 0, // 0=no history (stateless), N=keep last N messages, -1=unlimited
debug: 0, // 0=silent, 1=minimal, 2=readable summary, 3=full (no truncate), 4=verbose (raw details)
bottleneck: defaultBottleneckConfig,
retry: {
enabled: false,
retries: 2,
baseDelayMs: 500,
maxDelayMs: 5000,
retryableStatusCodes: [...DEFAULT_RETRYABLE_STATUS_CODES]
},
roundRobin: false, // false=fallback mode, true=round robin rotation
...config
}
const freeMix = { openrouter: true, cerebras: true, groq: true, together: false, lambda: false };
this.mix = { ...freeMix, ...mix };
this.limiter = new Bottleneck(this.config.bottleneck);
}
replace(keyValues) {
this.config.replace = { ...this.config.replace, ...keyValues };
return this;
}
static new({ options = {}, config = {}, mix = {} } = {}) {
return new ModelMix({ options, config, mix });
}
new({ options = {}, config = {}, mix = {} } = {}) {
const instance = new ModelMix({ options: { ...this.options, ...options }, config: { ...this.config, ...config }, mix: { ...this.mix, ...mix } });
instance.models = this.models; // Share models array for round-robin rotation
return instance;
}
static formatJSON(obj) {
return inspect(obj, {
depth: null,
colors: true,
maxArrayLength: null,
breakLength: 80,
compact: false
});
}
static formatMessage(message) {
if (typeof message !== 'string') return message;
try {
return ModelMix.formatJSON(JSON.parse(message.trim()));
} catch (e) {
return message;
}
}
// debug logging helpers
static truncate(str, maxLen = 1000) {
if (!str || typeof str !== 'string') return str;
return str.length > maxLen ? str.substring(0, maxLen) + '...' : str;
}
static calculateCost(modelKey, tokens) {
const pricing = MODEL_PRICING[modelKey];
if (!pricing) return null;
const [inputPerMillion, outputPerMillion] = pricing;
return (tokens.input * inputPerMillion / 1_000_000) + (tokens.output * outputPerMillion / 1_000_000);
}
static extractCacheTokens(usage = {}) {
return usage.input_tokens_details?.cached_tokens
|| usage.prompt_tokens_details?.cached_tokens
|| usage.cache_read_input_tokens
|| usage.cachedContentTokenCount
|| usage.cached_content_token_count
|| 0;
}
static formatInputSummary(messages, system, debug = 2) {
const lastMessage = messages[messages.length - 1];
let inputText = '';
if (lastMessage && Array.isArray(lastMessage.content)) {
const textContent = lastMessage.content.find(c => c.type === 'text');
if (textContent) inputText = textContent.text;
} else if (lastMessage && typeof lastMessage.content === 'string') {
inputText = lastMessage.content;
}
const noTruncate = debug >= 3;
const systemStr = noTruncate ? (system || '') : ModelMix.truncate(system, 500);
const inputStr = noTruncate ? inputText : ModelMix.truncate(inputText, 1200);
const msgCount = `(${messages.length} msg${messages.length !== 1 ? 's' : ''})`;
return `| SYSTEM\n${systemStr}\n| INPUT ${msgCount}\n${inputStr}`;
}
static formatOutputSummary(result, debug) {
const parts = [];
const noTruncate = debug >= 3;
if (result.message) {
// Try to parse as JSON for better formatting
try {
const parsed = JSON.parse(result.message.trim());
// If it's valid JSON and debug >= 2, show it formatted
if (debug >= 2) {
parts.push(`| OUTPUT (JSON)\n${ModelMix.formatJSON(parsed)}`);
} else {
parts.push(`| OUTPUT\n${ModelMix.truncate(result.message, 1500)}`);
}
} catch (e) {
parts.push(`| OUTPUT\n${noTruncate ? result.message : ModelMix.truncate(result.message, 1500)}`);
}
}
if (result.think) {
parts.push(`| THINK\n${noTruncate ? result.think : ModelMix.truncate(result.think, 800)}`);
}
if (result.toolCalls && result.toolCalls.length > 0) {
const toolNames = result.toolCalls.map(t => t.function?.name || t.name).join(', ');
parts.push(`| TOOLS\n${toolNames}`);
}
return parts.join('\n');
}
attach(key, provider) {
if (this.models.some(model => model.key === key)) {
return this;
}
if (this.messages.length > 0) {
throw new Error("Cannot add models after message generation has started.");
}
this.models.push({ key, provider });
return this;
}
gpt41({ options = {}, config = {} } = {}) {
return this.attach('gpt-4.1', new MixOpenAI({ options, config }));
}
gpt41mini({ options = {}, config = {} } = {}) {
return this.attach('gpt-4.1-mini', new MixOpenAI({ options, config }));
}
gpt41nano({ options = {}, config = {} } = {}) {
return this.attach('gpt-4.1-nano', new MixOpenAI({ options, config }));
}
gpt5({ options = {}, config = {} } = {}) {
return this.attach('gpt-5', new MixOpenAI({ options, config }));
}
gpt5mini({ options = {}, config = {} } = {}) {
return this.attach('gpt-5-mini', new MixOpenAI({ options, config }));
}
gpt5nano({ options = {}, config = {} } = {}) {
return this.attach('gpt-5-nano', new MixOpenAI({ options, config }));
}
gpt51({ options = {}, config = {} } = {}) {
return this.attach('gpt-5.1', new MixOpenAIResponses({ options, config }));
}
gpt52({ options = {}, config = {} } = {}) {
return this.attach('gpt-5.2', new MixOpenAIResponses({ options, config }));
}
gpt54({ options = {}, config = {} } = {}) {
return this.attach('gpt-5.4', new MixOpenAIResponses({ options, config }));
}
gpt54mini({ options = {}, config = {} } = {}) {
return this.attach('gpt-5.4-mini', new MixOpenAIResponses({ options, config }));
}
gpt54nano({ options = {}, config = {} } = {}) {
return this.attach('gpt-5.4-nano', new MixOpenAIResponses({ options, config }));
}
gpt54pro({ options = {}, config = {} } = {}) {
return this.attach('gpt-5.4-pro', new MixOpenAIResponses({ options, config }));
}
gpt55({ options = {}, config = {} } = {}) {
return this.attach('gpt-5.5', new MixOpenAIResponses({ options, config }));
}
gpt55pro({ options = {}, config = {} } = {}) {
return this.attach('gpt-5.5-pro', new MixOpenAIResponses({ options, config }));
}
gptRealtime({ options = {}, config = {} } = {}) {
return this.attach('gpt-realtime', new MixOpenAIWebSocket({ options, config }));
}
gptRealtimeMini({ options = {}, config = {} } = {}) {
return this.attach('gpt-realtime-mini', new MixOpenAIWebSocket({ options, config }));
}
gpt53codex({ options = {}, config = {} } = {}) {
return this.attach('gpt-5.3-codex', new MixOpenAIResponses({ options, config }));
}
gpt53chat({ options = {}, config = {} } = {}) {
return this.attach('gpt-5.3-chat-latest', new MixOpenAIResponses({ options, config }));
}
gptOss({ options = {}, config = {}, mix = {} } = {}) {
mix = { ...this.mix, ...mix };
if (mix.together) this.attach('openai/gpt-oss-120b', new MixTogether({ options, config }));
if (mix.cerebras) this.attach('gpt-oss-120b', new MixCerebras({ options, config }));
if (mix.groq) this.attach('openai/gpt-oss-120b', new MixGroq({ options, config }));
if (mix.openrouter) this.attach('openai/gpt-oss-120b:free', new MixOpenRouter({ options, config }));
return this;
}
opus47think({ options = {}, config = {} } = {}) {
options = { ...MixAnthropic.thinkingOptions, ...options };
return this.attach('claude-opus-4-7', new MixAnthropic({ options, config }));
}
opus46think({ options = {}, config = {} } = {}) {
options = { ...MixAnthropic.thinkingOptions, ...options };
return this.attach('claude-opus-4-6', new MixAnthropic({ options, config }));
}
opus45think({ options = {}, config = {} } = {}) {
options = { ...MixAnthropic.thinkingOptions, ...options };
return this.attach('claude-opus-4-5-20251101', new MixAnthropic({ options, config }));
}
opus47({ options = {}, config = {} } = {}) {
return this.attach('claude-opus-4-7', new MixAnthropic({ options, config }));
}
opus46({ options = {}, config = {} } = {}) {
return this.attach('claude-opus-4-6', new MixAnthropic({ options, config }));
}
opus45({ options = {}, config = {} } = {}) {
return this.attach('claude-opus-4-5-20251101', new MixAnthropic({ options, config }));
}
opus41({ options = {}, config = {} } = {}) {
return this.attach('claude-opus-4-1-20250805', new MixAnthropic({ options, config }));
}
opus41think({ options = {}, config = {} } = {}) {
options = { ...MixAnthropic.thinkingOptions, ...options };
return this.attach('claude-opus-4-1-20250805', new MixAnthropic({ options, config }));
}
sonnet4({ options = {}, config = {} } = {}) {
return this.attach('claude-sonnet-4-20250514', new MixAnthropic({ options, config }));
}
sonnet4think({ options = {}, config = {} } = {}) {
options = { ...MixAnthropic.thinkingOptions, ...options };
return this.attach('claude-sonnet-4-20250514', new MixAnthropic({ options, config }));
}
sonnet46({ options = {}, config = {} } = {}) {
return this.attach('claude-sonnet-4-6', new MixAnthropic({ options, config }));
}
sonnet46think({ options = {}, config = {} } = {}) {
options = { ...MixAnthropic.thinkingOptions, ...options };
return this.attach('claude-sonnet-4-6', new MixAnthropic({ options, config }));
}
sonnet45({ options = {}, config = {} } = {}) {
return this.attach('claude-sonnet-4-5-20250929', new MixAnthropic({ options, config }));
}
sonnet45think({ options = {}, config = {} } = {}) {
options = { ...MixAnthropic.thinkingOptions, ...options };
return this.attach('claude-sonnet-4-5-20250929', new MixAnthropic({ options, config }));
}
haiku35({ options = {}, config = {} } = {}) {
return this.attach('claude-3-5-haiku-20241022', new MixAnthropic({ options, config }));
}
haiku45({ options = {}, config = {} } = {}) {
return this.attach('claude-haiku-4-5-20251001', new MixAnthropic({ options, config }));
}
haiku45think({ options = {}, config = {} } = {}) {
options = { ...MixAnthropic.thinkingOptions, ...options };
return this.attach('claude-haiku-4-5-20251001', new MixAnthropic({ options, config }));
}
gemini25flash({ options = {}, config = {} } = {}) {
return this.attach('gemini-2.5-flash', new MixGoogle({ options, config }));
}
gemini31pro({ options = {}, config = {} } = {}) {
return this.attach('gemini-3.1-pro-preview', new MixGoogle({ options, config }));
}
gemini3pro({ options = {}, config = {} } = {}) {
return this.attach('gemini-3-pro-preview', new MixGoogle({ options, config }));
}
gemini3flash({ options = {}, config = {} } = {}) {
return this.attach('gemini-3-flash-preview', new MixGoogle({ options, config }));
}
gemini35flash({ options = {}, config = {} } = {}) {
return this.attach('gemini-3.5-flash', new MixGoogle({ options, config }));
}
gemini31flashLite({ options = {}, config = {} } = {}) {
return this.attach('gemini-3.1-flash-lite-preview', new MixGoogle({ options, config }));
}
gemini25pro({ options = {}, config = {} } = {}) {
return this.attach('gemini-2.5-pro', new MixGoogle({ options, config }));
}
sonarPro({ options = {}, config = {} } = {}) {
return this.attach('sonar-pro', new MixPerplexity({ options, config }));
}
sonar({ options = {}, config = {} } = {}) {
return this.attach('sonar', new MixPerplexity({ options, config }));
}
grok43({ options = {}, config = {} } = {}) {
return this.attach('grok-4.3', new MixGrok({ options, config }));
}
grok420multiAgent({ options = {}, config = {} } = {}) {
return this.attach('grok-4.20-multi-agent-0309', new MixGrok({ options, config }));
}
grok420think({ options = {}, config = {} } = {}) {
return this.attach('grok-4.20-0309-reasoning', new MixGrok({ options, config }));
}
grok420({ options = {}, config = {} } = {}) {
return this.attach('grok-4.20-0309-non-reasoning', new MixGrok({ options, config }));
}
grok41think({ options = {}, config = {} } = {}) {
return this.attach('grok-4-1-fast-reasoning', new MixGrok({ options, config }));
}
grok41({ options = {}, config = {} } = {}) {
return this.attach('grok-4-1-fast-non-reasoning', new MixGrok({ options, config }));
}
qwen3({ options = {}, config = {}, mix = { together: true, cerebras: false } } = {}) {
if (mix.together) this.attach('Qwen/Qwen3-235B-A22B-fp8-tput', new MixTogether({ options, config }));
if (mix.cerebras) this.attach('qwen-3-32b', new MixCerebras({ options, config }));
return this;
}
qwen36plus({ options = {}, config = {}, mix = { fireworks: true } } = {}) {
mix = { ...this.mix, ...mix };
if (mix.fireworks) this.attach('accounts/fireworks/models/qwen3p6-plus', new MixFireworks({ options, config }));
if (mix.together) this.attach('Qwen/Qwen3.6-Plus', new MixTogether({ options, config }));
return this;
}
scout({ options = {}, config = {}, mix = {} } = {}) {
mix = { ...this.mix, ...mix };
if (mix.groq) this.attach('meta-llama/llama-4-scout-17b-16e-instruct', new MixGroq({ options, config }));
if (mix.together) this.attach('meta-llama/Llama-4-Scout-17B-16E-Instruct', new MixTogether({ options, config }));
if (mix.cerebras) this.attach('llama-4-scout-17b-16e-instruct', new MixCerebras({ options, config }));
return this;
}
maverick({ options = {}, config = {}, mix = {} } = {}) {
mix = { ...this.mix, ...mix };
if (mix.together) this.attach('meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8', new MixTogether({ options, config }));
if (mix.lambda) this.attach('llama-4-maverick-17b-128e-instruct-fp8', new MixLambda({ options, config }));
return this;
}
deepseekR1({ options = {}, config = {}, mix = {} } = {}) {
mix = { ...this.mix, ...mix };
if (mix.groq) this.attach('deepseek-r1-distill-llama-70b', new MixGroq({ options, config }));
if (mix.together) this.attach('deepseek-ai/DeepSeek-R1', new MixTogether({ options, config }));
if (mix.cerebras) this.attach('deepseek-r1-distill-llama-70b', new MixCerebras({ options, config }));
if (mix.openrouter) this.attach('deepseek/deepseek-r1-0528:free', new MixOpenRouter({ options, config }));
return this;
}
hermes3({ options = {}, config = {}, mix = {} } = {}) {
mix = { ...this.mix, ...mix };
if (mix.lambda) this.attach('Hermes-3-Llama-3.1-405B-FP8', new MixLambda({ options, config }));
if (mix.openrouter) this.attach('nousresearch/hermes-3-llama-3.1-405b:free', new MixOpenRouter({ options, config }));
return this;
}
kimiK26think({ options = {}, config = {}, mix = { fireworks: true } } = {}) {
mix = { ...this.mix, ...mix };
if (mix.fireworks) this.attach('accounts/fireworks/models/kimi-k2p6', new MixFireworks({ options, config }));
if (mix.openrouter) this.attach('moonshotai/kimi-k2.6', new MixOpenRouter({ options, config }));
if (mix.together) this.attach('moonshotai/Kimi-K2.6', new MixTogether({ options, config }));
return this;
}
kimiK25think({ options = {}, config = {}, mix = { together: true } } = {}) {
mix = { ...this.mix, ...mix };
if (mix.together) this.attach('moonshotai/Kimi-K2.5', new MixTogether({ options, config }));
if (mix.fireworks) this.attach('accounts/fireworks/models/kimi-k2p5', new MixFireworks({ options, config }));
if (mix.openrouter) this.attach('moonshotai/kimi-k2.5', new MixOpenRouter({ options, config }));
return this;
}
lmstudio(model = 'lmstudio', { options = {}, config = {} } = {}) {
return this.attach(model, new MixLMStudio({ options, config }));
}
minimaxM25({ options = {}, config = {}, mix = { minimax: true } } = {}) {
mix = { ...this.mix, ...mix };
if (mix.minimax) this.attach('MiniMax-M2.5', new MixMiniMax({ options, config }));
if (mix.fireworks) this.attach('fireworks/minimax-m2p5', new MixFireworks({ options, config }));
return this;
}
minimaxM27({ options = {}, config = {}, mix = { openrouter: true, minimax: true } } = {}) {
mix = { ...this.mix, ...mix };
if (mix.nvidia) this.attach('minimaxai/minimax-m2.7', new MixNVIDIA({ options, config }));
if (mix.openrouter) return this.attach('minimax/minimax-m2.7', new MixOpenRouter({ options, config }));
if (mix.minimax) return this.attach('MiniMax-M2.7', new MixMiniMax({ options, config }));
if (mix.together) return this.attach('MiniMaxAI/MiniMax-M2.7', new MixTogether({ options, config }));
return this;
}
mimo25({ options = {}, config = {}, mix = { openrouter: true } } = {}) {
mix = { ...this.mix, ...mix };
if (mix.openrouter) this.attach('xiaomi/mimo-v2.5', new MixOpenRouter({ options, config }));
if (mix.mimo) this.attach('mimo-v2.5', new MixMiMo({ options, config }));
return this;
}
mimo25pro({ options = {}, config = {}, mix = { openrouter: true } } = {}) {
mix = { ...this.mix, ...mix };
if (mix.openrouter) this.attach('xiaomi/mimo-v2.5-pro', new MixOpenRouter({ options, config }));
if (mix.mimo) this.attach('mimo-v2.5-pro', new MixMiMo({ options, config }));
return this;
}
deepseekV4Pro({ options = {}, config = {}, mix = { fireworks: true } } = {}) {
mix = { ...this.mix, ...mix };
if (mix.nvidia) this.attach('deepseek-ai/deepseek-v4-pro', new MixNVIDIA({ options, config }));
if (mix.fireworks) this.attach('accounts/fireworks/models/deepseek-v4-pro', new MixFireworks({ options, config }));
if (mix.openrouter) this.attach('deepseek/deepseek-v4-pro', new MixOpenRouter({ options, config }));
if (mix.together) this.attach('deepseek-ai/DeepSeek-V4-Pro', new MixTogether({ options, config }));
return this;
}
deepseekV4Flash({ options = {}, config = {}, mix = { nvidia: true } } = {}) {
mix = { ...this.mix, ...mix };
if (mix.nvidia) this.attach('deepseek-ai/deepseek-v4-flash', new MixNVIDIA({ options, config }));
return this;
}
GLM51({ options = {}, config = {}, mix = { fireworks: true } } = {}) {
mix = { ...this.mix, ...mix };
if (mix.nvidia) this.attach('z-ai/glm-5.1', new MixNVIDIA({ options, config }));
if (mix.fireworks) this.attach('accounts/fireworks/models/glm-5p1', new MixFireworks({ options, config }));
if (mix.openrouter) this.attach('z-ai/glm-5.1', new MixOpenRouter({ options, config }));
if (mix.together) this.attach('zai-org/GLM-5.1', new MixTogether({ options, config }));
return this;
}
GLM5({ options = {}, config = {}, mix = { fireworks: true } } = {}) {
mix = { ...this.mix, ...mix };
if (mix.fireworks) this.attach('fireworks/glm-5', new MixFireworks({ options, config }));
return this;
}
GLM47({ options = {}, config = {}, mix = { fireworks: true } } = {}) {
mix = { ...this.mix, ...mix };
if (mix.fireworks) this.attach('accounts/fireworks/models/glm-4p7', new MixFireworks({ options, config }));
if (mix.openrouter) this.attach('z-ai/glm-4.7', new MixOpenRouter({ options, config }));
if (mix.cerebras) this.attach('zai-glm-4.7', new MixCerebras({ options, config }));
return this;
}
addText(text, { role = "user" } = {}) {
const content = [{
type: "text",
text
}];
this.messages.push({ role, content });
return this;
}
addTextFromFile(filePath, { role = "user" } = {}) {
const content = this.readFile(filePath);
this.addText(content, { role });
return this;
}
setSystem(text) {
this.config.system = text;
return this;
}
setSystemFromFile(filePath) {
const content = this.readFile(filePath);
this.setSystem(content);
return this;
}
addImageFromBuffer(buffer, { role = "user" } = {}) {
this.messages.push({
role,
content: [{
type: "image",
source: {
type: "buffer",
data: buffer
}
}]
});
return this;
}
addImage(filePath, { role = "user" } = {}) {
const absolutePath = path.resolve(filePath);
if (!fs.existsSync(absolutePath)) {
throw new Error(`Image file not found: ${filePath}`);
}
this.messages.push({
role,
content: [{
type: "image",
source: {
type: "file",
data: filePath
}
}]
});
return this;
}
addImageFromUrl(url, { role = "user" } = {}) {
let source;
if (url.startsWith('data:')) {
// Parse data URL: data:image/jpeg;base64,/9j/4AAQ...
const match = url.match(/^data:([^;]+);base64,(.+)$/);
if (match) {
source = {
type: "base64",
media_type: match[1],
data: match[2]
};
} else {
throw new Error('Invalid data URL format');
}
} else {
source = {
type: "url",
data: url
};
}
this.messages.push({
role,
content: [{
type: "image",
source
}]
});
return this;
}
async processImages() {
for (let i = 0; i < this.messages.length; i++) {
const message = this.messages[i];
if (!Array.isArray(message.content)) continue;
for (let j = 0; j < message.content.length; j++) {
const content = message.content[j];
if (content.type !== 'image' || content.source.type === 'base64') continue;
try {
let buffer, mimeType;
switch (content.source.type) {
case 'url':
const response = await fetchBinaryResponse(content.source.data);
buffer = response.data;
mimeType = response.headers['content-type'];
break;
case 'file':
buffer = this.readFile(content.source.data, { encoding: null });
break;
case 'buffer':
buffer = content.source.data;
break;
}
// Detect mimeType if not provided
if (!mimeType) {
if (typeof detectFileTypeFromBuffer !== 'function') {
throw new Error('file-type module does not expose a buffer detector');
}
const detectedType = await detectFileTypeFromBuffer(buffer);
if (!detectedType || !detectedType.mime.startsWith('image/')) {
throw new Error(`Invalid image - unable to detect valid image format`);
}
mimeType = detectedType.mime;
}
// Update the content with processed image
message.content[j] = {
type: "image",
source: {
type: "base64",
media_type: mimeType,
data: buffer.toString('base64')
}
};
} catch (error) {
console.error(`Error processing image:`, error);
// Remove failed image from content
message.content.splice(j, 1);
j--;
}
}
}
}
async message() {
let raw = await this.execute({ options: { stream: false } });
return raw.message;
}
async json(schemaExample = null, schemaDescription = {}, { type = 'json_object', addExample = false, addSchema = true, addNote = false } = {}) {
let isArrayWrap = false;
if (Array.isArray(schemaExample)) {
isArrayWrap = true;
schemaExample = { out: schemaExample };
if (Array.isArray(schemaDescription)) {
schemaDescription = { out: schemaDescription };
}
}
let options = {
response_format: { type },
stream: false,
}
// Apply template replacements to system before adding extra instructions
let systemWithReplacements = this._template(this.config.system, this.config.replace);
let config = {
system: systemWithReplacements,
}
if (schemaExample) {
config.schema = generateJsonSchema(schemaExample, schemaDescription);
if (addSchema) {
config.system += "\n\nOutput JSON Schema: \n```\n" + JSON.stringify(config.schema) + "\n```";
}
if (addExample) {
config.system += "\n\nOutput JSON Example: \n```\n" + JSON.stringify(schemaExample) + "\n```";
}
if (addNote) {
config.system += "\n\nOutput JSON Escape: double quotes, backslashes, and control characters inside JSON strings.\nEnsure the output contains no comments.";
}
}
const { message } = await this.execute({ options, config });
const parsed = JSON.parse(this._extractBlock(message));
return isArrayWrap ? parsed.out : parsed;
}
_extractBlock(response) {
const block = response.match(/```(?:\w+)?\s*([\s\S]*?)```/);
return block ? block[1].trim() : response.trim();
}
async block({ addSystemExtra = true } = {}) {
// Apply template replacements to system before adding extra instructions
let systemWithReplacements = this._template(this.config.system, this.config.replace);
let config = {
system: systemWithReplacements,
}
if (addSystemExtra) {
config.system += "\nReturn the result of the task between triple backtick block code tags ```";
}
const { message } = await this.execute({ options: { stream: false }, config });
return this._extractBlock(message);
}
async raw() {
return this.execute({ options: { stream: false } });
}
async stream(callback) {
this.streamCallback = callback;
return this.execute({ options: { stream: true } });
}
replaceKeyFromFile(key, filePath) {
try {
const content = this.readFile(filePath);
this.replace({ [key]: this._template(content, this.config.replace) });
} catch (error) {
// Gracefully handle file read errors without throwing
log.warn(`replaceKeyFromFile: ${error.message}`);
}
return this;
}
_template(input, replace) {
if (!replace) return input;
for (const k in replace) {
input = input.split(/([¿?¡!,"';:\(\)\.\s])/).map(x => x === k ? replace[k] : x).join("");
}
return input;
}
groupByRoles(messages) {
return messages.reduce((acc, currentMessage, index) => {
// Don't group tool messages or assistant messages with tool_calls
// Each tool response must be separate with its own tool_call_id
const shouldNotGroup = currentMessage.role === 'tool' ||
currentMessage.tool_calls ||
currentMessage.tool_call_id;
if (index === 0 || currentMessage.role !== messages[index - 1].role || shouldNotGroup) {
// acc.push({
// role: currentMessage.role,
// content: currentMessage.content
// });
acc.push(currentMessage);
} else {
acc[acc.length - 1].content = acc[acc.length - 1].content.concat(currentMessage.content);
}
return acc;
}, []);
}
applyTemplate() {
if (!this.config.replace) return;
this.config.system = this._template(this.config.system, this.config.replace);
this.messages = this.messages.map(message => {
if (message.content instanceof Array) {
message.content = message.content.map(content => {
if (content.type === 'text') {
content.text = this._template(content.text, this.config.replace);
}
return content;
});
}
return message;
});
}
async prepareMessages() {
await this.processImages();
this.applyTemplate();
// Smart message slicing based on max_history:
// 0 = no history (stateless), N = keep last N messages, -1 = unlimited
if (this.config.max_history > 0) {
let sliceStart = Math.max(0, this.messages.length - this.config.max_history);
// If we're slicing into the middle of a tool interaction,
// backtrack to include the full sequence (user → assistant/tool_calls → tool results)
while (sliceStart > 0 && sliceStart < this.messages.length) {
const msg = this.messages[sliceStart];
if (msg.role === 'tool' || (msg.role === 'assistant' && msg.tool_calls)) {
sliceStart--;
} else {
break;
}
}
this.messages = this.messages.slice(sliceStart);
}
// max_history = -1: unlimited, no slicing
// max_history = 0: no history, messages only contain what was added since last call
this.messages = this.groupByRoles(this.messages);
this.options.messages = this.messages;
}
readFile(filePath, { encoding = 'utf8' } = {}) {
try {
const absolutePath = path.resolve(filePath);
return fs.readFileSync(absolutePath, { encoding });
} catch (error) {
if (error.code === 'ENOENT') {
throw new Error(`File not found: ${filePath}`);
} else if (error.code === 'EACCES') {
throw new Error(`Permission denied: ${filePath}`);
} else {
throw new Error(`Error reading file ${filePath}: ${error.message}`);
}
}
}
async execute({ config = {}, options = {} } = {}) {
if (!this.models || this.models.length === 0) {
throw new Error("No models specified. Use methods like .gpt5(), .sonnet4() first.");
}
return this.limiter.schedule(async () => {
await this.prepareMessages();
if (this.messages.length === 0) {
throw new Error("No user messages have been added. Use addText(prompt), addTextFromFile(filePath), addImage(filePath), or addImageFromUrl(url) to add a prompt.");
}
// Merge config to get final roundRobin value and retry settings
const finalConfig = {
...this.config,
...config,
retry: {
...(this.config.retry || {}),
...(config.retry || {})
}
};
// Try all models in order (first is primary, rest are fallbacks)
const modelsToTry = this.models.map((model, index) => ({ model, index }));
// Round robin: rotate models array AFTER using current for next request
if (finalConfig.roundRobin && this.models.length > 1) {
const firstModel = this.models.shift();
this.models.push(firstModel);
}
let lastError = null;
for (let i = 0; i < modelsToTry.length; i++) {
const { model: currentModel, index: originalIndex } = modelsToTry[i];
const currentModelKey = currentModel.key;
const providerInstance = currentModel.provider;
const optionsTools = providerInstance.getOptionsTools(this.tools);
// Create clean copies for each provider to avoid contamination
const currentOptions = {
...this.options,
...providerInstance.options,
...optionsTools,
...options,
model: currentModelKey
};
const currentConfig = {
...finalConfig,
...providerInstance.config,
...config,
retry: {
...(finalConfig.retry || {}),
...(providerInstance.config?.retry || {}),
...(config.retry || {})
}
};
if (currentConfig.debug >= 1) {
const isPrimary = i === 0;
const prefix = isPrimary ? '→' : '↻';
const suffix = isPrimary
? (currentConfig.roundRobin ? ` (round-robin #${originalIndex + 1})` : '')
: ' (fallback)';
// Extract provider name from class name (e.g., "MixOpenRouter" -> "openrouter")
const providerName = providerInstance.constructor.name.replace(/^Mix/, '').toLowerCase();
const header = `\n${prefix} [${providerName}:${currentModelKey}] #${originalIndex + 1}${suffix}`;
if (currentConfig.debug >= 2) {
console.log(`${header}\n${ModelMix.formatInputSummary(this.messages, currentConfig.system, currentConfig.debug)}`);
} else {
console.log(header);
}
}
try {
if (currentOptions.stream && this.streamCallback) {
providerInstance.streamCallback = this.streamCallback;
}
const retryConfig = currentConfig.retry || {};
const retries = retryConfig.enabled ? Math.max(0, retryConfig.retries || 0) : 0;
const baseDelayMs = Math.max(0, retryConfig.baseDelayMs || 0);
const maxDelayMs = Math.max(baseDelayMs, retryConfig.maxDelayMs || baseDelayMs);
const retryableStatusCodes = new Set(
Array.isArray(retryConfig.retryableStatusCodes) && retryConfig.retryableStatusCodes.length > 0
? retryConfig.retryableStatusCodes
: DEFAULT_RETRYABLE_STATUS_CODES
);
let attempt = 0;
let result;
let startTime = 0;
while (true) {
try {
startTime = Date.now();
result = await providerInstance.create({ options: currentOptions, config: currentConfig });
break;
} catch (attemptError) {
const statusCode = getErrorStatusCode(attemptError);
const isRetryable = retryableStatusCodes.has(statusCode);
const canRetry = attempt < retries && isRetryable;
if (!canRetry) {
throw attemptError;
}
if (currentConfig.debug >= 1) {
const nextAttempt = attempt + 2;
const totalAttempts = retries + 1;
console.log(`↺ Retrying [${currentModelKey}] due to status ${statusCode} (${nextAttempt}/${totalAttempts})`);
}
const delay = Math.min(baseDelayMs * Math.pow(2, attempt), maxDelayMs);
await sleep(delay);
attempt += 1;
}
}
const elapsedMs = Date.now() - startTime;
if (result.tokens) {
result.tokens.cost = ModelMix.calculateCost(currentModelKey, result.tokens);
const elapsedSec = elapsedMs / 1000;
result.tokens.speed = elapsedSec > 0 ? Math.round(result.tokens.output / elapsedSec) : 0;
}
if (result.toolCalls && result.toolCalls.length > 0) {
if (result.message) {
if (result.signature) {
this.messages.push({
role: "assistant", content: [{
type: "thinking",
thinking: result.think,
signature: result.signature
}]
});
} else {
this.addText(result.message, { role: "assistant" });
}
}
this.messages.push({ role: "assistant", content: null, tool_calls: result.toolCalls });
const toolResults = await this.processToolCalls(result.toolCalls);
for (const toolResult of toolResults) {
this.messages.push({
role: 'tool',
tool_call_id: toolResult.tool_call_id,
name: toolResult.name,
content: toolResult.content
});
}
return this.execute({ options, config });
}
// debug level 1: Just success indicator
if (currentConfig.debug === 1) {
console.log(`✓ Success`);
}
// debug level 2: Readable summary of output
if (currentConfig.debug >= 2) {
const tokenInfo = result.tokens
? ` ${result.tokens.input} → ${result.tokens.output} tok`
+ (result.tokens.cached ? ` (cached:${result.tokens.cached})` : '')
+ (result.tokens.speed ? ` | ${result.tokens.speed} t/s` : '')
+ (result.tokens.cost != null ? ` $${result.tokens.cost.toFixed(4)}` : '')
: '';
console.log(`✓${tokenInfo}\n${ModelMix.formatOutputSummary(result, currentConfig.debug).trim()}`);
}
// debug level 4 (verbose): Full response details
if (currentConfig.debug >= 4) {
if (result.response) {
console.log('\n[RAW RESPONSE]');
console.log(ModelMix.formatJSON(result.response));
}
if (result.message) {
console.log('\n[FULL MESSAGE]');
console.log(ModelMix.formatMessage(result.message));
}
if (result.think) {
console.log('\n[FULL THINKING]');
console.log(result.think);
}
}
if (currentConfig.debug >= 1) console.log('');
this.lastRaw = result;
// Manage conversation history based on max_history setting
if (this.config.max_history === 0) {
// Stateless: clear messages so next call starts fresh
this.messages = [];
} else if (result.message) {
// Persist assistant response for multi-turn conversations
if (result.signature) {
this.messages.push({
role: "assistant", content: [{
type: "thinking",
thinking: result.think,
signature: result.signature
}, {
type: "text",
text: result.message
}]
});
} else {
this.addText(result.message, { role: "assistant" });
}
}
return result;
} catch (error) {
lastError = error;
log.warn(`Model ${currentModelKey} failed (Attempt #${i + 1}/${modelsToTry.length}).`);
if (error.message) log.warn(`Error: ${error.message}`);
if (error.statusCode) log.warn(`Status Code: ${error.statusCode}`);
if (error.details) log.warn(`Details:\n${ModelMix.formatJSON(error.details)}`);
if (i === modelsToTry.length - 1) {
console.error(`All ${modelsToTry.length} model(s) failed. Throwing last error from ${currentModelKey}.`);
throw lastError;
} else {
const nextModelKey = modelsToTry[i + 1].model.key;
log.info(`-> Proceeding to next model: ${nextModelKey}`);
}
}
}
log.error("Fallback logic completed without success or throwing the final error.");
throw lastError || new Error("Failed to get response from any model, and no specific error was caught.");
});
}
async processToolCalls(toolCalls) {
const result = []
for (const toolCall of toolCalls) {
// Handle different tool call formats more robustly
let toolName, toolArgs, toolId;
try {
if (toolCall.function) {
// Formato OpenAI/normalizado
toolName = toolCall.function.name;
toolArgs = typeof toolCall.function.arguments === 'string'
? JSON.parse(toolCall.function.arguments)
: toolCall.function.arguments;
toolId = toolCall.id;
} else if (toolCall.name) {