UNPKG

lynkr

Version:

Self-hosted LLM gateway and tier-routing proxy for Claude Code, Cursor, and Codex. Routes across Ollama, AWS Bedrock, OpenRouter, Databricks, Azure OpenAI, llama.cpp, and LM Studio with prompt caching, MCP tools, and 60-80% cost savings.

1,409 lines (1,234 loc) 91.5 kB
const crypto = require("crypto"); const config = require("../config"); const http = require("http"); const https = require("https"); const { withRetry } = require("./retry"); const { getCircuitBreakerRegistry } = require("./circuit-breaker"); const { getMetricsCollector } = require("../observability/metrics"); const { getHealthTracker } = require("../observability/health-tracker"); const { createBulkhead } = require("./resilience"); const logger = require("../logger"); const { STANDARD_TOOLS, STANDARD_TOOL_NAMES } = require("./standard-tools"); const { convertAnthropicToolsToOpenRouter } = require("./openrouter-utils"); const { detectModelFamily } = require("./bedrock-utils"); const { getGPTSystemPromptAddendum } = require("./gpt-utils"); const telemetry = require("../routing/telemetry"); const { scoreResponseQuality } = require("../routing/quality-scorer"); const { getLatencyTracker } = require("../routing/latency-tracker"); if (typeof fetch !== "function") { throw new Error("Node 18+ is required for the built-in fetch API."); } // Z.AI request bulkhead - limit concurrent requests to avoid rate limiting // Configurable via ZAI_MAX_CONCURRENT env var (default: 2) const zaiMaxConcurrent = parseInt(process.env.ZAI_MAX_CONCURRENT || '2', 10); const zaiSemaphore = createBulkhead({ maxConcurrent: zaiMaxConcurrent, maxQueue: 50 }); logger.info({ maxConcurrent: zaiMaxConcurrent }, "Z.AI bulkhead initialized"); // HTTP connection pooling for better performance // Increased maxSockets for high-concurrency team deployments (50+ devs) const httpAgent = new http.Agent({ keepAlive: true, maxSockets: 200, maxFreeSockets: 20, timeout: 120000, keepAliveMsecs: 30000, }); const httpsAgent = new https.Agent({ keepAlive: true, maxSockets: 200, maxFreeSockets: 20, timeout: 120000, keepAliveMsecs: 30000, }); async function performJsonRequest(url, { headers = {}, body }, providerLabel) { const agent = url.startsWith('https:') ? httpsAgent : httpAgent; const isStreaming = body.stream === true; // Streaming requests can't be retried, so handle them directly if (isStreaming) { const response = await fetch(url, { method: "POST", headers, body: JSON.stringify(body), agent, }); logger.debug({ provider: providerLabel, status: response.status, streaming: true, }, `${providerLabel} API streaming response`); if (!response.ok) { const errorText = await response.text(); logger.warn({ provider: providerLabel, status: response.status, error: errorText.substring(0, 200), }, `${providerLabel} API streaming error`); } return { ok: response.ok, status: response.status, stream: response.body, // Return the readable stream contentType: response.headers.get("content-type"), headers: response.headers, }; } // Non-streaming requests use retry logic return withRetry(async () => { const response = await fetch(url, { method: "POST", headers, body: JSON.stringify(body), agent, }); const text = await response.text(); logger.debug({ provider: providerLabel, status: response.status, responseLength: text.length, }, `${providerLabel} API response`); let json; try { json = JSON.parse(text); } catch { json = null; } const result = { ok: response.ok, status: response.status, json, text, contentType: response.headers.get("content-type"), headers: response.headers, }; // Log errors for retry logic if (!response.ok) { logger.warn({ provider: providerLabel, status: response.status, error: json?.error || text.substring(0, 200), }, `${providerLabel} API error`); } return result; }, { maxRetries: config.apiRetry?.maxRetries || 3, initialDelay: config.apiRetry?.initialDelay || 1000, maxDelay: config.apiRetry?.maxDelay || 30000, }); } async function invokeDatabricks(body) { if (!config.databricks?.url) { throw new Error("Databricks configuration is missing required URL."); } // Create a copy of body to avoid mutating the original const databricksBody = { ...body }; // Inject standard tools if client didn't send any (passthrough mode) if (!Array.isArray(databricksBody.tools) || databricksBody.tools.length === 0) { databricksBody.tools = STANDARD_TOOLS; logger.debug({ injectedToolCount: STANDARD_TOOLS.length, injectedToolNames: STANDARD_TOOL_NAMES, reason: "Client did not send tools (passthrough mode)" }, "=== INJECTING STANDARD TOOLS (Databricks) ==="); } // Convert Anthropic format tools to OpenAI format (Databricks uses OpenAI format) if (Array.isArray(databricksBody.tools) && databricksBody.tools.length > 0) { // Check if tools are already in OpenAI format (have type: "function") const alreadyConverted = databricksBody.tools[0]?.type === "function"; if (!alreadyConverted) { databricksBody.tools = convertAnthropicToolsToOpenRouter(databricksBody.tools); logger.debug({ convertedToolCount: databricksBody.tools.length, convertedToolNames: databricksBody.tools.map(t => t.function?.name), }, "Converted tools to OpenAI format for Databricks"); } else { logger.debug({ toolCount: databricksBody.tools.length, toolNames: databricksBody.tools.map(t => t.function?.name), }, "Tools already in OpenAI format, skipping conversion"); } } const headers = { Authorization: `Bearer ${config.databricks.apiKey}`, "Content-Type": "application/json", }; return performJsonRequest(config.databricks.url, { headers, body: databricksBody }, "Databricks"); } async function invokeAzureAnthropic(body) { if (!config.azureAnthropic?.endpoint) { throw new Error("Azure Anthropic endpoint is not configured."); } // Inject standard tools if client didn't send any (passthrough mode) if (!Array.isArray(body.tools) || body.tools.length === 0) { body.tools = STANDARD_TOOLS; logger.debug({ injectedToolCount: STANDARD_TOOLS.length, injectedToolNames: STANDARD_TOOL_NAMES, reason: "Client did not send tools (passthrough mode)" }, "=== INJECTING STANDARD TOOLS (Azure Anthropic) ==="); } const headers = { "Content-Type": "application/json", "x-api-key": config.azureAnthropic.apiKey, "anthropic-version": config.azureAnthropic.version ?? "2023-06-01", }; return performJsonRequest( config.azureAnthropic.endpoint, { headers, body }, "Azure Anthropic", ); } async function invokeOllama(body) { if (!config.ollama?.endpoint) { throw new Error("Ollama endpoint is not configured."); } const { checkOllamaToolSupport, hasAnthropicEndpoint, convertAnthropicToolsToOllama } = require("./ollama-utils"); const modelName = body._suggestionModeModel || body._tierModel || config.ollama.model; // Detect whether Ollama has the native Anthropic Messages API (v0.14.0+) const useAnthropicApi = await hasAnthropicEndpoint(config.ollama.endpoint); // Check if model supports tools FIRST (before wasteful injection) const supportsTools = await checkOllamaToolSupport(modelName); const injectToolsOllama = process.env.INJECT_TOOLS_OLLAMA !== "false"; // Determine tools to send let toolsToSend = body.tools; let toolsInjected = false; if (!supportsTools) { toolsToSend = null; } else if (injectToolsOllama && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) { toolsToSend = STANDARD_TOOLS; toolsInjected = true; } // Consolidated tool injection log const toolCount = (supportsTools && Array.isArray(toolsToSend)) ? toolsToSend.length : 0; let logMessage; if (!supportsTools) { logMessage = `Tools not supported (0 tools)`; } else if (toolsInjected) { logMessage = `injected ${toolCount} tools`; } else if (Array.isArray(toolsToSend) && toolsToSend.length > 0) { logMessage = `Using client-provided tools (${toolCount} tools)`; } else if (!injectToolsOllama) { logMessage = `Tool injection disabled (0 tools)`; } else { logMessage = `No tools (0 tools)`; } logger.debug({ model: modelName, apiMode: useAnthropicApi ? "anthropic" : "legacy", toolCount, toolsInjected, supportsTools, toolNames: (Array.isArray(toolsToSend) && toolsToSend.length > 0) ? toolsToSend.map(t => t.name) : [] }, `=== Ollama STANDARD TOOLS INJECTION for ${config.ollama.model} === ${logMessage}`); // ---- Anthropic-native path (Ollama v0.14.0+) ---- if (useAnthropicApi) { const endpoint = `${config.ollama.endpoint}/v1/messages`; const headers = { "Content-Type": "application/json", "anthropic-version": "2023-06-01", }; // Build body with only valid Anthropic Messages API fields const ollamaBody = { model: modelName, messages: body.messages, max_tokens: body.max_tokens || 16384, stream: body.stream ?? false, }; if (body.system) ollamaBody.system = body.system; if (body.temperature !== undefined) ollamaBody.temperature = body.temperature; if (body.top_p !== undefined) ollamaBody.top_p = body.top_p; if (body.top_k !== undefined) ollamaBody.top_k = body.top_k; if (body.stop_sequences) ollamaBody.stop_sequences = body.stop_sequences; if (body.tool_choice) ollamaBody.tool_choice = body.tool_choice; if (body.metadata) ollamaBody.metadata = body.metadata; // Tools (already Anthropic format — no conversion needed) if (supportsTools && Array.isArray(toolsToSend) && toolsToSend.length > 0) { ollamaBody.tools = toolsToSend; } if (config.ollama.keepAlive !== undefined) { const keepAlive = config.ollama.keepAlive; ollamaBody.keep_alive = /^-?\d+$/.test(keepAlive) ? parseInt(keepAlive, 10) : keepAlive; logger.debug({ keepAlive: ollamaBody.keep_alive }, "Ollama keep_alive configured"); } return performJsonRequest(endpoint, { headers, body: ollamaBody }, "Ollama"); } // ---- Legacy path (Ollama < v0.14.0, /api/chat with OpenAI format) ---- const endpoint = `${config.ollama.endpoint}/api/chat`; const headers = { "Content-Type": "application/json" }; // Convert Anthropic messages to Ollama format (content blocks → strings) const convertedMessages = []; if (body.system && typeof body.system === "string" && body.system.trim().length > 0) { convertedMessages.push({ role: "system", content: body.system.trim() }); } (body.messages || []).forEach(msg => { let content = msg.content; if (Array.isArray(content)) { content = content .filter(block => block.type === 'text') .map(block => block.text || '') .join('\n'); } convertedMessages.push({ role: msg.role, content: content || '' }); }); // Deduplicate consecutive messages with same role const deduplicated = []; let lastRole = null; for (const msg of convertedMessages) { if (msg.role === lastRole) { logger.debug({ skippedRole: msg.role, contentPreview: msg.content.substring(0, 50) }, 'Ollama: Skipping duplicate consecutive message with same role'); continue; } deduplicated.push(msg); lastRole = msg.role; } const ollamaBody = { model: modelName, messages: deduplicated, stream: body.stream ?? false, options: { temperature: body.temperature ?? 0.7, num_predict: body.max_tokens ?? 16384, top_p: body.top_p ?? 1.0, }, }; if (config.ollama.keepAlive !== undefined) { const keepAlive = config.ollama.keepAlive; ollamaBody.keep_alive = /^-?\d+$/.test(keepAlive) ? parseInt(keepAlive, 10) : keepAlive; logger.debug({ keepAlive: ollamaBody.keep_alive }, "Ollama keep_alive configured"); } // Tools need conversion to OpenAI function-calling format for legacy endpoint if (supportsTools && Array.isArray(toolsToSend) && toolsToSend.length > 0) { ollamaBody.tools = convertAnthropicToolsToOllama(toolsToSend); } return performJsonRequest(endpoint, { headers, body: ollamaBody }, "Ollama"); } async function invokeOpenRouter(body) { if (!config.openrouter?.endpoint || !config.openrouter?.apiKey) { throw new Error("OpenRouter endpoint or API key is not configured."); } const { convertAnthropicToolsToOpenRouter, convertAnthropicMessagesToOpenRouter } = require("./openrouter-utils"); const endpoint = config.openrouter.endpoint; const headers = { "Authorization": `Bearer ${config.openrouter.apiKey}`, "Content-Type": "application/json", "HTTP-Referer": "https://localhost:8080", "X-Title": "Claude-Ollama-Proxy" }; // Convert messages and handle system message const messages = convertAnthropicMessagesToOpenRouter(body.messages || []); // Anthropic uses separate 'system' field, OpenAI needs it as first message if (body.system) { messages.unshift({ role: "system", content: body.system }); } const openRouterBody = { model: body._suggestionModeModel || body._tierModel || config.openrouter.model, messages, temperature: body.temperature ?? 0.7, max_tokens: body.max_tokens ?? 16384, top_p: body.top_p ?? 1.0, stream: body.stream ?? false }; // Add tools - inject standard tools if client didn't send any (passthrough mode) let toolsToSend = body.tools; let toolsInjected = false; if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) { // Client didn't send tools (likely passthrough mode) - inject standard Claude Code tools toolsToSend = STANDARD_TOOLS; toolsInjected = true; logger.debug({ injectedToolCount: STANDARD_TOOLS.length, injectedToolNames: STANDARD_TOOL_NAMES, reason: "Client did not send tools (passthrough mode)" }, "=== INJECTING STANDARD TOOLS (OpenRouter) ==="); } if (Array.isArray(toolsToSend) && toolsToSend.length > 0) { openRouterBody.tools = convertAnthropicToolsToOpenRouter(toolsToSend); logger.debug({ toolCount: toolsToSend.length, toolNames: toolsToSend.map(t => t.name), toolsInjected }, "Sending tools to OpenRouter"); } return performJsonRequest(endpoint, { headers, body: openRouterBody }, "OpenRouter"); } function detectAzureFormat(url) { if (url.includes("/openai/responses")) return "responses"; if (url.includes("/models/")) return "models"; if (url.includes("/openai/deployments")) return "deployments"; throw new Error("Unknown Azure OpenAI endpoint"); } async function invokeAzureOpenAI(body) { if (!config.azureOpenAI?.endpoint || !config.azureOpenAI?.apiKey) { throw new Error("Azure OpenAI endpoint or API key is not configured."); } const { convertAnthropicToolsToOpenRouter, convertAnthropicMessagesToOpenRouter } = require("./openrouter-utils"); // Azure OpenAI URL format const endpoint = config.azureOpenAI.endpoint; const format = detectAzureFormat(endpoint); const headers = { "Content-Type": "application/json" }; // Azure AI Foundry (services.ai.azure.com) uses Bearer auth // Standard Azure OpenAI (openai.azure.com) uses api-key header if (endpoint.includes("services.ai.azure.com")) { headers["Authorization"] = `Bearer ${config.azureOpenAI.apiKey}`; } else { headers["api-key"] = config.azureOpenAI.apiKey; } // Convert messages and handle system message const messages = convertAnthropicMessagesToOpenRouter(body.messages || []); // Anthropic uses separate 'system' field, OpenAI needs it as first message if (body.system) { messages.unshift({ role: "system", content: body.system }); } // System prompt injection disabled - breaks model response // Tool guidance now provided via tool descriptions instead const azureDeployment = body._suggestionModeModel || body._tierModel || config.azureOpenAI.deployment || ""; const isGpt5 = /gpt-5/i.test(azureDeployment); const maxTokensKey = isGpt5 ? "max_completion_tokens" : "max_tokens"; const azureBody = { messages, temperature: body.temperature ?? 0.3, [maxTokensKey]: Math.min(body.max_tokens ?? 16384, 16384), top_p: body.top_p ?? 1.0, stream: false, model: azureDeployment }; // Add tools - inject standard tools if client didn't send any (passthrough mode) let toolsToSend = body.tools; let toolsInjected = false; if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) { // Client didn't send tools (likely passthrough mode) - inject standard Claude Code tools toolsToSend = STANDARD_TOOLS; toolsInjected = true; logger.debug({ injectedToolCount: STANDARD_TOOLS.length, injectedToolNames: STANDARD_TOOL_NAMES, reason: "Client did not send tools (passthrough mode)" }, "=== INJECTING STANDARD TOOLS ==="); } if (Array.isArray(toolsToSend) && toolsToSend.length > 0) { azureBody.tools = convertAnthropicToolsToOpenRouter(toolsToSend); azureBody.parallel_tool_calls = true; // Enable parallel tool calls azureBody.tool_choice = "auto"; // Explicitly enable tool use (helps GPT models understand they should use tools) logger.debug({ toolCount: toolsToSend.length, toolNames: toolsToSend.map(t => t.name), toolsInjected, hasSystemMessage: !!body.system, messageCount: messages.length, temperature: azureBody.temperature, sampleTool: azureBody.tools[0] // Log first tool for inspection }, "=== SENDING TOOLS TO AZURE OPENAI ==="); } logger.debug({ endpoint, hasTools: !!azureBody.tools, toolCount: azureBody.tools?.length || 0, temperature: azureBody.temperature, max_tokens: azureBody.max_tokens, tool_choice: azureBody.tool_choice }, "=== AZURE OPENAI REQUEST ==="); if (format === "deployments" || format === "models") { return performJsonRequest(endpoint, { headers, body: azureBody }, "Azure OpenAI"); } else if (format === "responses") { // Responses API uses 'input' instead of 'messages' and flat tool format // Convert tools from Chat Completions format to Responses API format const responsesTools = azureBody.tools?.map(tool => { if (tool.type === "function" && tool.function) { // Flatten: {type:"function", function:{name,description,parameters}} -> {type:"function", name, description, parameters} return { type: "function", name: tool.function.name, description: tool.function.description, parameters: tool.function.parameters }; } return tool; }); // Convert messages to Responses API input format // Responses API uses different structure for tool calls and results const responsesInput = []; // Track function call IDs for matching with outputs const pendingCallIds = []; // Detect if this is a continuation request (has tool results) // Azure content filter triggers on full system prompt in continuations // Check for: // 1. tool_result blocks in user messages (Anthropic format) // 2. tool messages (OpenAI format) // 3. assistant messages with tool_use or tool_calls (indicates prior tool invocation) // 4. Flattened continuation pattern from orchestrator (contains "IMPORTANT: Focus on") const hasToolResults = (body.messages || []).some(msg => { // Check for Anthropic format tool_result in user messages if (msg.role === "user" && Array.isArray(msg.content)) { if (msg.content.some(block => block.type === "tool_result")) return true; } // Check for OpenAI format tool messages if (msg.role === "tool") return true; // Check for assistant messages with tool_use (Anthropic) or tool_calls (OpenAI) // If there's a prior tool use, this is a continuation if (msg.role === "assistant") { if (Array.isArray(msg.content)) { if (msg.content.some(block => block.type === "tool_use")) return true; } if (msg.tool_calls && msg.tool_calls.length > 0) return true; } return false; }) || azureBody.messages.some(msg => { // Also check converted messages for flattened continuation pattern // The orchestrator flattens tool results into user message with this marker if (msg.role === "user" && typeof msg.content === "string") { if (msg.content.includes("IMPORTANT: Focus on and respond ONLY to my most recent request")) return true; } return false; }); if (hasToolResults) { logger.debug({ hasToolResults: true, originalMessageCount: (body.messages || []).length, convertedMessageCount: azureBody.messages.length, messageRoles: (body.messages || []).map(m => m.role), }, "=== CONTINUATION REQUEST DETECTED - using minimal system prompt to avoid Azure content filter ==="); } else { logger.debug({ hasToolResults: false, originalMessageCount: (body.messages || []).length, messageRoles: (body.messages || []).map(m => m.role), }, "Initial request - using full system prompt"); } // Helper function to strip <system-reminder> tags and meta-instructions from content // Azure's jailbreak filter triggers on these instructions in continuation requests const stripSystemReminders = (content) => { if (!content || typeof content !== 'string') return content; // Remove <system-reminder>...</system-reminder> blocks let cleaned = content.replace(/<system-reminder>[\s\S]*?<\/system-reminder>/gi, ''); // Remove the continuation marker that orchestrator adds cleaned = cleaned.replace(/---\s*IMPORTANT:\s*Focus on and respond ONLY to my most recent request[^\n]*/gi, ''); // Trim whitespace return cleaned.trim(); }; for (const msg of azureBody.messages) { if (msg.role === "system") { // For continuation requests, use minimal system prompt to avoid content filter // Azure's jailbreak detection triggers on security-related text in continuations if (hasToolResults) { responsesInput.push({ type: "message", role: "developer", content: "You are a helpful coding assistant. Continue helping the user based on the tool results." }); } else { // Initial request - use full system prompt responsesInput.push({ type: "message", role: "developer", content: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content) }); } } else if (msg.role === "user") { // Check if content contains tool_result blocks (Anthropic format) if (Array.isArray(msg.content)) { for (const block of msg.content) { if (block.type === "tool_result") { // Convert tool_result to function_call_output // Use tool_use_id if available, otherwise pop from pending call IDs const callId = block.tool_use_id || pendingCallIds.shift() || `call_${Date.now()}`; responsesInput.push({ type: "function_call_output", call_id: callId, output: typeof block.content === 'string' ? block.content : JSON.stringify(block.content || "") }); } else if (block.type === "text") { // For continuation requests, strip system-reminder tags to avoid jailbreak filter const textContent = hasToolResults ? stripSystemReminders(block.text || "") : (block.text || ""); if (textContent) { // Only add if there's content after stripping responsesInput.push({ type: "message", role: "user", content: textContent }); } } } } else { // For continuation requests, strip system-reminder tags to avoid jailbreak filter let userContent = typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content); if (hasToolResults) { userContent = stripSystemReminders(userContent); } if (userContent) { // Only add if there's content after stripping responsesInput.push({ type: "message", role: "user", content: userContent }); } } } else if (msg.role === "assistant") { // Assistant messages - handle tool_calls (OpenAI format) and tool_use blocks (Anthropic format) if (msg.tool_calls && msg.tool_calls.length > 0) { // OpenAI format: tool_calls array for (const tc of msg.tool_calls) { const callId = tc.id || `call_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`; pendingCallIds.push(callId); responsesInput.push({ type: "function_call", call_id: callId, name: tc.function?.name || tc.name, arguments: typeof tc.function?.arguments === 'string' ? tc.function.arguments : JSON.stringify(tc.function?.arguments || {}) }); } } // Handle content - could be string, array with tool_use blocks, or array with text blocks if (Array.isArray(msg.content)) { // Anthropic format: content is array of blocks for (const block of msg.content) { if (block.type === "tool_use") { const callId = block.id || `call_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`; pendingCallIds.push(callId); responsesInput.push({ type: "function_call", call_id: callId, name: block.name, arguments: typeof block.input === 'string' ? block.input : JSON.stringify(block.input || {}) }); } else if (block.type === "text" && block.text) { responsesInput.push({ type: "message", role: "assistant", content: block.text }); } } } else if (msg.content) { // String content responsesInput.push({ type: "message", role: "assistant", content: msg.content }); } } else if (msg.role === "tool") { // Tool results become function_call_output // Use tool_call_id if available, otherwise pop from pending call IDs const callId = msg.tool_call_id || pendingCallIds.shift() || `call_${Date.now()}`; responsesInput.push({ type: "function_call_output", call_id: callId, output: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content) }); } } const responsesBody = { input: responsesInput, model: azureBody.model, max_output_tokens: azureBody.max_tokens, tools: responsesTools, tool_choice: azureBody.tool_choice, stream: false }; logger.debug({ format: "responses", inputCount: responsesBody.input?.length, model: responsesBody.model, hasTools: !!responsesBody.tools }, "Using Responses API format"); const result = await performJsonRequest(endpoint, { headers, body: responsesBody }, "Azure OpenAI Responses"); // Convert Responses API response to Chat Completions format if (result.ok && result.json?.output) { const outputArray = result.json.output || []; // Find message output (contains text content) const messageOutput = outputArray.find(o => o.type === "message"); const textContent = messageOutput?.content?.find(c => c.type === "output_text")?.text || ""; // Find function_call outputs (tool calls are separate items in output array) const rawToolCalls = outputArray .filter(o => o.type === "function_call") .map(tc => ({ id: tc.call_id || tc.id || `call_${Date.now()}`, type: "function", function: { name: tc.name, arguments: typeof tc.arguments === 'string' ? tc.arguments : JSON.stringify(tc.arguments || {}) } })); // Deduplicate identical tool calls (GPT sometimes returns multiple identical calls) const seenSignatures = new Set(); const toolCalls = rawToolCalls.filter(tc => { const signature = `${tc.function.name}:${tc.function.arguments}`; if (seenSignatures.has(signature)) { logger.warn({ toolName: tc.function.name, signature: signature.substring(0, 100), }, "Filtered duplicate tool call from GPT response"); return false; } seenSignatures.add(signature); return true; }); if (rawToolCalls.length !== toolCalls.length) { logger.debug({ originalCount: rawToolCalls.length, dedupedCount: toolCalls.length, removed: rawToolCalls.length - toolCalls.length, }, "Deduplicated identical tool calls from single response"); } logger.debug({ outputTypes: outputArray.map(o => o.type), hasMessage: !!messageOutput, toolCallCount: toolCalls.length, toolCallNames: toolCalls.map(tc => tc.function.name) }, "Parsing Responses API output"); // Convert to Chat Completions format result.json = { id: result.json.id, object: "chat.completion", created: result.json.created_at, model: result.json.model, choices: [{ index: 0, message: { role: "assistant", content: textContent, tool_calls: toolCalls.length > 0 ? toolCalls : undefined }, finish_reason: toolCalls.length > 0 ? "tool_calls" : "stop" }], usage: result.json.usage }; logger.debug({ convertedContent: textContent?.substring(0, 100), hasToolCalls: toolCalls.length > 0, toolCallCount: toolCalls.length }, "Converted Responses API to Chat Completions format"); // Now convert from Chat Completions format to Anthropic format const anthropicJson = convertOpenAIToAnthropic(result.json); logger.debug({ anthropicContentTypes: anthropicJson.content?.map(c => c.type), stopReason: anthropicJson.stop_reason }, "Converted to Anthropic format"); return { ok: result.ok, status: result.status, json: anthropicJson, text: JSON.stringify(anthropicJson), contentType: "application/json", headers: result.headers, }; } return result; } else { throw new Error(`Unsupported Azure OpenAI endpoint format: ${format}`); } } async function invokeOpenAI(body) { if (!config.openai?.apiKey) { throw new Error("OpenAI API key is not configured."); } const { convertAnthropicToolsToOpenRouter, convertAnthropicMessagesToOpenRouter } = require("./openrouter-utils"); const endpoint = config.openai.endpoint || "https://api.openai.com/v1/chat/completions"; const headers = { "Authorization": `Bearer ${config.openai.apiKey}`, "Content-Type": "application/json", }; // Add organization header if configured if (config.openai.organization) { headers["OpenAI-Organization"] = config.openai.organization; } // Convert messages and handle system message const messages = convertAnthropicMessagesToOpenRouter(body.messages || []); // Anthropic uses separate 'system' field, OpenAI needs it as first message if (body.system) { messages.unshift({ role: "system", content: body.system }); } // System prompt injection disabled - breaks model response const openAIBody = { model: body._suggestionModeModel || body._tierModel || config.openai.model || "gpt-4o", messages, temperature: body.temperature ?? 0.7, max_tokens: body.max_tokens ?? 16384, top_p: body.top_p ?? 1.0, stream: body.stream ?? false }; // Add tools - inject standard tools if client didn't send any (passthrough mode) let toolsToSend = body.tools; let toolsInjected = false; if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) { // Client didn't send tools (likely passthrough mode) - inject standard Claude Code tools toolsToSend = STANDARD_TOOLS; toolsInjected = true; logger.debug({ injectedToolCount: STANDARD_TOOLS.length, injectedToolNames: STANDARD_TOOL_NAMES, reason: "Client did not send tools (passthrough mode)" }, "=== INJECTING STANDARD TOOLS (OpenAI) ==="); } if (Array.isArray(toolsToSend) && toolsToSend.length > 0) { openAIBody.tools = convertAnthropicToolsToOpenRouter(toolsToSend); openAIBody.parallel_tool_calls = false; // Disable parallel tool calls - GPT often makes duplicate calls openAIBody.tool_choice = "auto"; // Let the model decide when to use tools logger.debug({ toolCount: toolsToSend.length, toolNames: toolsToSend.map(t => t.name), toolsInjected }, "=== SENDING TOOLS TO OPENAI ==="); } logger.debug({ endpoint, model: openAIBody.model, hasTools: !!openAIBody.tools, toolCount: openAIBody.tools?.length || 0, temperature: openAIBody.temperature, max_tokens: openAIBody.max_tokens, }, "=== OPENAI REQUEST ==="); return performJsonRequest(endpoint, { headers, body: openAIBody }, "OpenAI"); } async function invokeLlamaCpp(body) { if (!config.llamacpp?.endpoint) { throw new Error("llama.cpp endpoint is not configured."); } const { convertAnthropicToolsToOpenRouter, convertAnthropicMessagesToOpenRouter } = require("./openrouter-utils"); const endpoint = `${config.llamacpp.endpoint}/v1/chat/completions`; const headers = { "Content-Type": "application/json", }; // Add API key if configured (for secured llama.cpp servers) if (config.llamacpp.apiKey) { headers["Authorization"] = `Bearer ${config.llamacpp.apiKey}`; } // Convert messages to OpenAI format const messages = convertAnthropicMessagesToOpenRouter(body.messages || []); // Handle system message if (body.system) { messages.unshift({ role: "system", content: body.system }); } // FIX: Deduplicate consecutive messages with same role (llama.cpp rejects this) const deduplicated = []; let lastRole = null; for (const msg of messages) { if (msg.role === lastRole) { logger.debug({ skippedRole: msg.role, contentPreview: typeof msg.content === 'string' ? msg.content.substring(0, 50) : JSON.stringify(msg.content).substring(0, 50) }, 'llama.cpp: Skipping duplicate consecutive message with same role'); continue; } deduplicated.push(msg); lastRole = msg.role; } if (deduplicated.length !== messages.length) { logger.debug({ originalCount: messages.length, deduplicatedCount: deduplicated.length, removed: messages.length - deduplicated.length, messageRoles: messages.map(m => m.role).join(' → '), deduplicatedRoles: deduplicated.map(m => m.role).join(' → ') }, 'llama.cpp: Removed consecutive duplicate roles from message sequence'); } const llamacppBody = { messages: deduplicated, temperature: body.temperature ?? 0.7, max_tokens: body.max_tokens ?? 16384, top_p: body.top_p ?? 1.0, stream: body.stream ?? false }; // Inject standard tools if client didn't send any let toolsToSend = body.tools; let toolsInjected = false; const injectToolsLlamacpp = process.env.INJECT_TOOLS_LLAMACPP !== "false"; if (injectToolsLlamacpp && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) { toolsToSend = STANDARD_TOOLS; toolsInjected = true; logger.debug({ injectedToolCount: STANDARD_TOOLS.length, injectedToolNames: STANDARD_TOOL_NAMES, reason: "Client did not send tools (passthrough mode)" }, "=== INJECTING STANDARD TOOLS (llama.cpp) ==="); } else if (!injectToolsLlamacpp) { logger.debug({}, "Tool injection disabled for llama.cpp (INJECT_TOOLS_LLAMACPP=false)"); } if (Array.isArray(toolsToSend) && toolsToSend.length > 0) { llamacppBody.tools = convertAnthropicToolsToOpenRouter(toolsToSend); llamacppBody.tool_choice = "auto"; logger.debug({ toolCount: toolsToSend.length, toolNames: toolsToSend.map(t => t.name), toolsInjected }, "=== SENDING TOOLS TO LLAMA.CPP ==="); } logger.debug({ endpoint, hasTools: !!llamacppBody.tools, toolCount: llamacppBody.tools?.length || 0, temperature: llamacppBody.temperature, max_tokens: llamacppBody.max_tokens, messageCount: llamacppBody.messages?.length || 0, messageRoles: llamacppBody.messages?.map(m => m.role).join(' → '), messages: llamacppBody.messages?.map((m, i) => ({ index: i, role: m.role, hasContent: !!m.content, contentPreview: typeof m.content === 'string' ? m.content.substring(0, 100) : JSON.stringify(m.content).substring(0, 100), hasToolCalls: !!m.tool_calls, toolCallCount: m.tool_calls?.length || 0, })) }, "=== LLAMA.CPP REQUEST ==="); return performJsonRequest(endpoint, { headers, body: llamacppBody }, "llama.cpp"); } async function invokeLMStudio(body) { if (!config.lmstudio?.endpoint) { throw new Error("LM Studio endpoint is not configured."); } const { convertAnthropicToolsToOpenRouter, convertAnthropicMessagesToOpenRouter } = require("./openrouter-utils"); const endpoint = `${config.lmstudio.endpoint}/v1/chat/completions`; const headers = { "Content-Type": "application/json", }; // Add API key if configured (for secured LM Studio servers) if (config.lmstudio.apiKey) { headers["Authorization"] = `Bearer ${config.lmstudio.apiKey}`; } // Convert messages to OpenAI format const messages = convertAnthropicMessagesToOpenRouter(body.messages || []); // Handle system message if (body.system) { messages.unshift({ role: "system", content: body.system }); } const lmstudioBody = { messages, temperature: body.temperature ?? 0.7, max_tokens: body.max_tokens ?? 16384, top_p: body.top_p ?? 1.0, stream: body.stream ?? false }; // Inject standard tools if client didn't send any let toolsToSend = body.tools; let toolsInjected = false; if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) { toolsToSend = STANDARD_TOOLS; toolsInjected = true; logger.debug({ injectedToolCount: STANDARD_TOOLS.length, injectedToolNames: STANDARD_TOOL_NAMES, reason: "Client did not send tools (passthrough mode)" }, "=== INJECTING STANDARD TOOLS (LM Studio) ==="); } if (Array.isArray(toolsToSend) && toolsToSend.length > 0) { lmstudioBody.tools = convertAnthropicToolsToOpenRouter(toolsToSend); lmstudioBody.tool_choice = "auto"; logger.debug({ toolCount: toolsToSend.length, toolNames: toolsToSend.map(t => t.name), toolsInjected }, "=== SENDING TOOLS TO LM STUDIO ==="); } logger.debug({ endpoint, hasTools: !!lmstudioBody.tools, toolCount: lmstudioBody.tools?.length || 0, temperature: lmstudioBody.temperature, max_tokens: lmstudioBody.max_tokens, }, "=== LM STUDIO REQUEST ==="); return performJsonRequest(endpoint, { headers, body: lmstudioBody }, "LM Studio"); } async function invokeBedrock(body) { // 1. Validate Bearer token if (!config.bedrock?.apiKey) { throw new Error( "AWS Bedrock requires AWS_BEDROCK_API_KEY (Bearer token). " + "Generate from AWS Console → Bedrock → API Keys, then set AWS_BEDROCK_API_KEY in your .env file." ); } const bearerToken = config.bedrock.apiKey; logger.debug({ authMethod: "Bearer Token" }, "=== BEDROCK AUTH ==="); // 2. Inject standard tools if needed let toolsToSend = body.tools; let toolsInjected = false; if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) { toolsToSend = STANDARD_TOOLS; toolsInjected = true; logger.debug({ injectedToolCount: STANDARD_TOOLS.length, injectedToolNames: STANDARD_TOOL_NAMES, reason: "Client did not send tools (passthrough mode)" }, "=== INJECTING STANDARD TOOLS (Bedrock) ==="); } const bedrockBody = { ...body, tools: toolsToSend }; // 4. Detect model family and convert format const modelId = body._tierModel || config.bedrock.modelId; const modelFamily = detectModelFamily(modelId); logger.debug({ modelId, modelFamily, hasTools: !!bedrockBody.tools, toolCount: bedrockBody.tools?.length || 0, streaming: body.stream || false, }, "=== BEDROCK REQUEST (FETCH) ==="); // 5. Convert to Bedrock Converse API format (simpler, more universal) // Bedrock Converse API only allows 'user' and 'assistant' roles in messages array // Extract system messages from messages array (if any) const systemMessages = bedrockBody.messages.filter(msg => msg.role === 'system'); const converseBody = { messages: bedrockBody.messages .filter(msg => msg.role !== 'system') // Filter out system messages .map(msg => ({ role: msg.role, content: Array.isArray(msg.content) ? msg.content.map(c => ({ text: c.text || c.content || "" })) : [{ text: msg.content }] })) }; // Add system prompt (from Anthropic system field OR extracted from messages) if (bedrockBody.system) { converseBody.system = [{ text: bedrockBody.system }]; } else if (systemMessages.length > 0) { // If system messages were in the messages array, use the first one const systemContent = Array.isArray(systemMessages[0].content) ? systemMessages[0].content.map(c => c.text || c.content || "").join("\n") : systemMessages[0].content; converseBody.system = [{ text: systemContent }]; } // Add inference config if (bedrockBody.max_tokens) { converseBody.inferenceConfig = { maxTokens: bedrockBody.max_tokens, temperature: bedrockBody.temperature, topP: bedrockBody.top_p, }; } // Add tools if present if (bedrockBody.tools && bedrockBody.tools.length > 0) { converseBody.toolConfig = { tools: bedrockBody.tools.map(tool => ({ toolSpec: { name: tool.name, description: tool.description, inputSchema: { json: tool.input_schema } } })) }; } // 6. Construct Bedrock Converse API endpoint const path = `/model/${modelId}/converse`; const host = `bedrock-runtime.${config.bedrock.region}.amazonaws.com`; const endpoint = `https://${host}${path}`; logger.debug({ endpoint, authMethod: "Bearer Token", hasSystem: !!converseBody.system, hasTools: !!converseBody.toolConfig, messageCount: converseBody.messages.length }, "=== BEDROCK CONVERSE API REQUEST ==="); // 7. Prepare request headers with Bearer token const requestHeaders = { "Content-Type": "application/json", "Authorization": `Bearer ${bearerToken}` }; // 8. Make the Converse API request try { const response = await performJsonRequest(endpoint, { headers: requestHeaders, body: converseBody // Pass object, performJsonRequest will stringify it }, "Bedrock"); // Add provider label for logging if (!response.ok) { const errorText = response.text; // Use property, not method logger.error({ status: response.status, error: errorText }, "=== BEDROCK CONVERSE API ERROR ==="); throw new Error(`Bedrock Converse API failed: ${response.status} ${errorText}`); } // Parse Converse API response (already parsed by performJsonRequest) const converseResponse = response.json; // Use property, not method logger.debug({ stopReason: converseResponse.stopReason, inputTokens: converseResponse.usage?.inputTokens || 0, outputTokens: converseResponse.usage?.outputTokens || 0, hasToolUse: !!converseResponse.output?.message?.content?.some(c => c.toolUse) }, "=== BEDROCK CONVERSE API RESPONSE ==="); // Convert Converse API response to Anthropic format const message = converseResponse.output.message; const anthropicResponse = { id: `bedrock-${Date.now()}`, type: "message", role: message.role, model: modelId, content: message.content.map(item => { if (item.text) { return { type: "text", text: item.text }; } else if (item.toolUse) { return { type: "tool_use", id: item.toolUse.toolUseId, name: item.toolUse.name, input: item.toolUse.input }; } return item; }), stop_reason: converseResponse.stopReason === "end_turn" ? "end_turn" : converseResponse.stopReason === "tool_use" ? "tool_use" : converseResponse.stopReason === "max_tokens" ? "max_tokens" : "end_turn", usage: { input_tokens: converseResponse.usage?.inputTokens || 0, output_tokens: converseResponse.usage?.outputTokens || 0, }, }; return { ok: true, status: 200, json: anthropicResponse, actualProvider: "bedrock", modelFamily, }; } catch (e) { logger.error({ error: e.message, modelId, region: config.bedrock.region, endpoint, stack: e.stack }, "=== BEDROCK CONVERSE API ERROR ==="); throw e; } } /** * Z.AI (Zhipu) Provider * * Z.AI offers GLM models through an Anthropic-compatible API at ~1/7 the cost. * Minimal transformation needed - mostly passthrough with model mapping. */ async function invokeZai(body) { if (!config.zai?.apiKey) { throw new Error("Z.AI API key is not configured. Set ZAI_API_KEY in your .env file."); } const endpoint = config.zai.endpoint || "https://api.z.ai/api/anthropic/v1/messages"; const isOpenAIFormat = endpoint.includes("/chat/completions"); // Model mapping: Anthropic names → Z.AI names (lowercase) const modelMap = { "claude-sonnet-4-5-20250929": "glm-4.7", "claude-sonnet-4-5": "glm-4.7", "claude-sonnet-4.5": "glm-4.7", "claude-3-5-sonnet": "glm-4.7", "claude-haiku-4-5-20251001": "glm-4.5-air", "claude-haiku-4-5": "glm-4.5-air", "claude-3-haiku": "glm-4.5-air", }; const requestedModel = body._tierModel || body.model || config.zai.model; let mappedModel = modelMap[requestedModel] || config.zai.model || "glm-4.7"; mappedModel = mappedModel.toLowerCase(); let zaiBody; let headers; if (isOpenAIFormat) { const { convertAnthropicToolsToOpenRouter, convertAnthropicMessagesToOpenRouter } = require("./openrouter-utils"); // Convert messages using existing utility let messages = convertAnthropicMessagesToOpenRouter(body.messages || []); // Extract system content from body.system OR from system messages in the array let systemContent = ""; if (body.system) { systemContent = Array.isArray(body.system) ? body.system.map(s => s.text || s).join("\n") : body.system; } // Filter out any system role messages (Z.AI doesn't support system role) // and collect their content const filteredMessages = []; for (const msg of messages) { if (msg.role === "system") { // Append system message content to systemContent if (msg.content) { systemContent = systemContent ? `${systemContent}\n${msg.content}` : msg.content; } } else { filteredMessages.push(msg); } } messages = filteredMessages; // Prepend system content to first user message ONLY if no tools // When tools are present, system instructions can confuse tool calling const hasTools = Array.isArray(body.tools) && body.tools.length > 0; if (systemContent && messages.length > 0 && !hasTools) { const firstUserIdx = messages.findIndex(m => m.role === "user"); if (firstUserIdx >= 0) { const firstUser = messages[firstUserIdx]; firstUser.content = `[System Instructions]\n${systemContent}\n\n[User Message]\n${firstUser.content}`; } else { // No user message, add system as user message messages.unshift({ role: "user", content: systemContent }); } } else if (systemContent && !hasTools) { // No messages at all, add system as user messages.push({ role: "user", content: systemContent }); } // Convert tools if present let tools = undefined; if (Array.isArray(body.tools) && body.tools.length > 0) { tools = convertAnthropicToolsToOpenRouter(body.tools); } zaiBody = { model: mappedModel, messages, max_tokens: body.max_tokens || 16384, temperature: body.temperature ?? 0.7, stream: body.stream, }; // Only add tools if present if (tools && tools.length > 0) { zaiBody.tools = tools; // Use "auto" to let the model decide when to use tools // "required" was forcing tools even for simple greetings zaiBody.tool_choice = "auto"; // Also enable parallel tool calls zaiBody.parallel_tool_calls = false; // Disable parallel tool calls - GPT often makes duplicate calls } headers = { "Content-Type": "application/json", "Authorization": `Bearer ${config.zai.apiKey}`, }; } else { // Anthropic format endpoint zaiBody = { ...body }; zaiBody.model = mappedModel; // Inject standard tools if client didn't send any (passthrough mode) if (!Array.isArray(zaiBody.tools) || zaiBody.tools.length === 0) { zaiBody.tools = STANDARD_TOOLS; logger.debug