lynkr
Version:
Self-hosted LLM gateway and tier-routing proxy for Claude Code, Cursor, and Codex. Routes across Ollama, AWS Bedrock, OpenRouter, Databricks, Azure OpenAI, llama.cpp, and LM Studio with prompt caching, MCP tools, and 60-80% cost savings.
1,409 lines (1,234 loc) • 91.5 kB
JavaScript
const crypto = require("crypto");
const config = require("../config");
const http = require("http");
const https = require("https");
const { withRetry } = require("./retry");
const { getCircuitBreakerRegistry } = require("./circuit-breaker");
const { getMetricsCollector } = require("../observability/metrics");
const { getHealthTracker } = require("../observability/health-tracker");
const { createBulkhead } = require("./resilience");
const logger = require("../logger");
const { STANDARD_TOOLS, STANDARD_TOOL_NAMES } = require("./standard-tools");
const { convertAnthropicToolsToOpenRouter } = require("./openrouter-utils");
const {
detectModelFamily
} = require("./bedrock-utils");
const { getGPTSystemPromptAddendum } = require("./gpt-utils");
const telemetry = require("../routing/telemetry");
const { scoreResponseQuality } = require("../routing/quality-scorer");
const { getLatencyTracker } = require("../routing/latency-tracker");
if (typeof fetch !== "function") {
throw new Error("Node 18+ is required for the built-in fetch API.");
}
// Z.AI request bulkhead - limit concurrent requests to avoid rate limiting
// Configurable via ZAI_MAX_CONCURRENT env var (default: 2)
const zaiMaxConcurrent = parseInt(process.env.ZAI_MAX_CONCURRENT || '2', 10);
const zaiSemaphore = createBulkhead({ maxConcurrent: zaiMaxConcurrent, maxQueue: 50 });
logger.info({ maxConcurrent: zaiMaxConcurrent }, "Z.AI bulkhead initialized");
// HTTP connection pooling for better performance
// Increased maxSockets for high-concurrency team deployments (50+ devs)
const httpAgent = new http.Agent({
keepAlive: true,
maxSockets: 200,
maxFreeSockets: 20,
timeout: 120000,
keepAliveMsecs: 30000,
});
const httpsAgent = new https.Agent({
keepAlive: true,
maxSockets: 200,
maxFreeSockets: 20,
timeout: 120000,
keepAliveMsecs: 30000,
});
async function performJsonRequest(url, { headers = {}, body }, providerLabel) {
const agent = url.startsWith('https:') ? httpsAgent : httpAgent;
const isStreaming = body.stream === true;
// Streaming requests can't be retried, so handle them directly
if (isStreaming) {
const response = await fetch(url, {
method: "POST",
headers,
body: JSON.stringify(body),
agent,
});
logger.debug({
provider: providerLabel,
status: response.status,
streaming: true,
}, `${providerLabel} API streaming response`);
if (!response.ok) {
const errorText = await response.text();
logger.warn({
provider: providerLabel,
status: response.status,
error: errorText.substring(0, 200),
}, `${providerLabel} API streaming error`);
}
return {
ok: response.ok,
status: response.status,
stream: response.body, // Return the readable stream
contentType: response.headers.get("content-type"),
headers: response.headers,
};
}
// Non-streaming requests use retry logic
return withRetry(async () => {
const response = await fetch(url, {
method: "POST",
headers,
body: JSON.stringify(body),
agent,
});
const text = await response.text();
logger.debug({
provider: providerLabel,
status: response.status,
responseLength: text.length,
}, `${providerLabel} API response`);
let json;
try {
json = JSON.parse(text);
} catch {
json = null;
}
const result = {
ok: response.ok,
status: response.status,
json,
text,
contentType: response.headers.get("content-type"),
headers: response.headers,
};
// Log errors for retry logic
if (!response.ok) {
logger.warn({
provider: providerLabel,
status: response.status,
error: json?.error || text.substring(0, 200),
}, `${providerLabel} API error`);
}
return result;
}, {
maxRetries: config.apiRetry?.maxRetries || 3,
initialDelay: config.apiRetry?.initialDelay || 1000,
maxDelay: config.apiRetry?.maxDelay || 30000,
});
}
async function invokeDatabricks(body) {
if (!config.databricks?.url) {
throw new Error("Databricks configuration is missing required URL.");
}
// Create a copy of body to avoid mutating the original
const databricksBody = { ...body };
// Inject standard tools if client didn't send any (passthrough mode)
if (!Array.isArray(databricksBody.tools) || databricksBody.tools.length === 0) {
databricksBody.tools = STANDARD_TOOLS;
logger.debug({
injectedToolCount: STANDARD_TOOLS.length,
injectedToolNames: STANDARD_TOOL_NAMES,
reason: "Client did not send tools (passthrough mode)"
}, "=== INJECTING STANDARD TOOLS (Databricks) ===");
}
// Convert Anthropic format tools to OpenAI format (Databricks uses OpenAI format)
if (Array.isArray(databricksBody.tools) && databricksBody.tools.length > 0) {
// Check if tools are already in OpenAI format (have type: "function")
const alreadyConverted = databricksBody.tools[0]?.type === "function";
if (!alreadyConverted) {
databricksBody.tools = convertAnthropicToolsToOpenRouter(databricksBody.tools);
logger.debug({
convertedToolCount: databricksBody.tools.length,
convertedToolNames: databricksBody.tools.map(t => t.function?.name),
}, "Converted tools to OpenAI format for Databricks");
} else {
logger.debug({
toolCount: databricksBody.tools.length,
toolNames: databricksBody.tools.map(t => t.function?.name),
}, "Tools already in OpenAI format, skipping conversion");
}
}
const headers = {
Authorization: `Bearer ${config.databricks.apiKey}`,
"Content-Type": "application/json",
};
return performJsonRequest(config.databricks.url, { headers, body: databricksBody }, "Databricks");
}
async function invokeAzureAnthropic(body) {
if (!config.azureAnthropic?.endpoint) {
throw new Error("Azure Anthropic endpoint is not configured.");
}
// Inject standard tools if client didn't send any (passthrough mode)
if (!Array.isArray(body.tools) || body.tools.length === 0) {
body.tools = STANDARD_TOOLS;
logger.debug({
injectedToolCount: STANDARD_TOOLS.length,
injectedToolNames: STANDARD_TOOL_NAMES,
reason: "Client did not send tools (passthrough mode)"
}, "=== INJECTING STANDARD TOOLS (Azure Anthropic) ===");
}
const headers = {
"Content-Type": "application/json",
"x-api-key": config.azureAnthropic.apiKey,
"anthropic-version": config.azureAnthropic.version ?? "2023-06-01",
};
return performJsonRequest(
config.azureAnthropic.endpoint,
{ headers, body },
"Azure Anthropic",
);
}
async function invokeOllama(body) {
if (!config.ollama?.endpoint) {
throw new Error("Ollama endpoint is not configured.");
}
const { checkOllamaToolSupport, hasAnthropicEndpoint, convertAnthropicToolsToOllama } = require("./ollama-utils");
const modelName = body._suggestionModeModel || body._tierModel || config.ollama.model;
// Detect whether Ollama has the native Anthropic Messages API (v0.14.0+)
const useAnthropicApi = await hasAnthropicEndpoint(config.ollama.endpoint);
// Check if model supports tools FIRST (before wasteful injection)
const supportsTools = await checkOllamaToolSupport(modelName);
const injectToolsOllama = process.env.INJECT_TOOLS_OLLAMA !== "false";
// Determine tools to send
let toolsToSend = body.tools;
let toolsInjected = false;
if (!supportsTools) {
toolsToSend = null;
} else if (injectToolsOllama && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) {
toolsToSend = STANDARD_TOOLS;
toolsInjected = true;
}
// Consolidated tool injection log
const toolCount = (supportsTools && Array.isArray(toolsToSend)) ? toolsToSend.length : 0;
let logMessage;
if (!supportsTools) {
logMessage = `Tools not supported (0 tools)`;
} else if (toolsInjected) {
logMessage = `injected ${toolCount} tools`;
} else if (Array.isArray(toolsToSend) && toolsToSend.length > 0) {
logMessage = `Using client-provided tools (${toolCount} tools)`;
} else if (!injectToolsOllama) {
logMessage = `Tool injection disabled (0 tools)`;
} else {
logMessage = `No tools (0 tools)`;
}
logger.debug({
model: modelName,
apiMode: useAnthropicApi ? "anthropic" : "legacy",
toolCount,
toolsInjected,
supportsTools,
toolNames: (Array.isArray(toolsToSend) && toolsToSend.length > 0) ? toolsToSend.map(t => t.name) : []
}, `=== Ollama STANDARD TOOLS INJECTION for ${config.ollama.model} === ${logMessage}`);
// ---- Anthropic-native path (Ollama v0.14.0+) ----
if (useAnthropicApi) {
const endpoint = `${config.ollama.endpoint}/v1/messages`;
const headers = {
"Content-Type": "application/json",
"anthropic-version": "2023-06-01",
};
// Build body with only valid Anthropic Messages API fields
const ollamaBody = {
model: modelName,
messages: body.messages,
max_tokens: body.max_tokens || 16384,
stream: body.stream ?? false,
};
if (body.system) ollamaBody.system = body.system;
if (body.temperature !== undefined) ollamaBody.temperature = body.temperature;
if (body.top_p !== undefined) ollamaBody.top_p = body.top_p;
if (body.top_k !== undefined) ollamaBody.top_k = body.top_k;
if (body.stop_sequences) ollamaBody.stop_sequences = body.stop_sequences;
if (body.tool_choice) ollamaBody.tool_choice = body.tool_choice;
if (body.metadata) ollamaBody.metadata = body.metadata;
// Tools (already Anthropic format — no conversion needed)
if (supportsTools && Array.isArray(toolsToSend) && toolsToSend.length > 0) {
ollamaBody.tools = toolsToSend;
}
if (config.ollama.keepAlive !== undefined) {
const keepAlive = config.ollama.keepAlive;
ollamaBody.keep_alive = /^-?\d+$/.test(keepAlive)
? parseInt(keepAlive, 10)
: keepAlive;
logger.debug({ keepAlive: ollamaBody.keep_alive }, "Ollama keep_alive configured");
}
return performJsonRequest(endpoint, { headers, body: ollamaBody }, "Ollama");
}
// ---- Legacy path (Ollama < v0.14.0, /api/chat with OpenAI format) ----
const endpoint = `${config.ollama.endpoint}/api/chat`;
const headers = { "Content-Type": "application/json" };
// Convert Anthropic messages to Ollama format (content blocks → strings)
const convertedMessages = [];
if (body.system && typeof body.system === "string" && body.system.trim().length > 0) {
convertedMessages.push({ role: "system", content: body.system.trim() });
}
(body.messages || []).forEach(msg => {
let content = msg.content;
if (Array.isArray(content)) {
content = content
.filter(block => block.type === 'text')
.map(block => block.text || '')
.join('\n');
}
convertedMessages.push({ role: msg.role, content: content || '' });
});
// Deduplicate consecutive messages with same role
const deduplicated = [];
let lastRole = null;
for (const msg of convertedMessages) {
if (msg.role === lastRole) {
logger.debug({
skippedRole: msg.role,
contentPreview: msg.content.substring(0, 50)
}, 'Ollama: Skipping duplicate consecutive message with same role');
continue;
}
deduplicated.push(msg);
lastRole = msg.role;
}
const ollamaBody = {
model: modelName,
messages: deduplicated,
stream: body.stream ?? false,
options: {
temperature: body.temperature ?? 0.7,
num_predict: body.max_tokens ?? 16384,
top_p: body.top_p ?? 1.0,
},
};
if (config.ollama.keepAlive !== undefined) {
const keepAlive = config.ollama.keepAlive;
ollamaBody.keep_alive = /^-?\d+$/.test(keepAlive)
? parseInt(keepAlive, 10)
: keepAlive;
logger.debug({ keepAlive: ollamaBody.keep_alive }, "Ollama keep_alive configured");
}
// Tools need conversion to OpenAI function-calling format for legacy endpoint
if (supportsTools && Array.isArray(toolsToSend) && toolsToSend.length > 0) {
ollamaBody.tools = convertAnthropicToolsToOllama(toolsToSend);
}
return performJsonRequest(endpoint, { headers, body: ollamaBody }, "Ollama");
}
async function invokeOpenRouter(body) {
if (!config.openrouter?.endpoint || !config.openrouter?.apiKey) {
throw new Error("OpenRouter endpoint or API key is not configured.");
}
const {
convertAnthropicToolsToOpenRouter,
convertAnthropicMessagesToOpenRouter
} = require("./openrouter-utils");
const endpoint = config.openrouter.endpoint;
const headers = {
"Authorization": `Bearer ${config.openrouter.apiKey}`,
"Content-Type": "application/json",
"HTTP-Referer": "https://localhost:8080",
"X-Title": "Claude-Ollama-Proxy"
};
// Convert messages and handle system message
const messages = convertAnthropicMessagesToOpenRouter(body.messages || []);
// Anthropic uses separate 'system' field, OpenAI needs it as first message
if (body.system) {
messages.unshift({
role: "system",
content: body.system
});
}
const openRouterBody = {
model: body._suggestionModeModel || body._tierModel || config.openrouter.model,
messages,
temperature: body.temperature ?? 0.7,
max_tokens: body.max_tokens ?? 16384,
top_p: body.top_p ?? 1.0,
stream: body.stream ?? false
};
// Add tools - inject standard tools if client didn't send any (passthrough mode)
let toolsToSend = body.tools;
let toolsInjected = false;
if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) {
// Client didn't send tools (likely passthrough mode) - inject standard Claude Code tools
toolsToSend = STANDARD_TOOLS;
toolsInjected = true;
logger.debug({
injectedToolCount: STANDARD_TOOLS.length,
injectedToolNames: STANDARD_TOOL_NAMES,
reason: "Client did not send tools (passthrough mode)"
}, "=== INJECTING STANDARD TOOLS (OpenRouter) ===");
}
if (Array.isArray(toolsToSend) && toolsToSend.length > 0) {
openRouterBody.tools = convertAnthropicToolsToOpenRouter(toolsToSend);
logger.debug({
toolCount: toolsToSend.length,
toolNames: toolsToSend.map(t => t.name),
toolsInjected
}, "Sending tools to OpenRouter");
}
return performJsonRequest(endpoint, { headers, body: openRouterBody }, "OpenRouter");
}
function detectAzureFormat(url) {
if (url.includes("/openai/responses")) return "responses";
if (url.includes("/models/")) return "models";
if (url.includes("/openai/deployments")) return "deployments";
throw new Error("Unknown Azure OpenAI endpoint");
}
async function invokeAzureOpenAI(body) {
if (!config.azureOpenAI?.endpoint || !config.azureOpenAI?.apiKey) {
throw new Error("Azure OpenAI endpoint or API key is not configured.");
}
const {
convertAnthropicToolsToOpenRouter,
convertAnthropicMessagesToOpenRouter
} = require("./openrouter-utils");
// Azure OpenAI URL format
const endpoint = config.azureOpenAI.endpoint;
const format = detectAzureFormat(endpoint);
const headers = {
"Content-Type": "application/json"
};
// Azure AI Foundry (services.ai.azure.com) uses Bearer auth
// Standard Azure OpenAI (openai.azure.com) uses api-key header
if (endpoint.includes("services.ai.azure.com")) {
headers["Authorization"] = `Bearer ${config.azureOpenAI.apiKey}`;
} else {
headers["api-key"] = config.azureOpenAI.apiKey;
}
// Convert messages and handle system message
const messages = convertAnthropicMessagesToOpenRouter(body.messages || []);
// Anthropic uses separate 'system' field, OpenAI needs it as first message
if (body.system) {
messages.unshift({
role: "system",
content: body.system
});
}
// System prompt injection disabled - breaks model response
// Tool guidance now provided via tool descriptions instead
const azureDeployment = body._suggestionModeModel || body._tierModel || config.azureOpenAI.deployment || "";
const isGpt5 = /gpt-5/i.test(azureDeployment);
const maxTokensKey = isGpt5 ? "max_completion_tokens" : "max_tokens";
const azureBody = {
messages,
temperature: body.temperature ?? 0.3,
[maxTokensKey]: Math.min(body.max_tokens ?? 16384, 16384),
top_p: body.top_p ?? 1.0,
stream: false,
model: azureDeployment
};
// Add tools - inject standard tools if client didn't send any (passthrough mode)
let toolsToSend = body.tools;
let toolsInjected = false;
if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) {
// Client didn't send tools (likely passthrough mode) - inject standard Claude Code tools
toolsToSend = STANDARD_TOOLS;
toolsInjected = true;
logger.debug({
injectedToolCount: STANDARD_TOOLS.length,
injectedToolNames: STANDARD_TOOL_NAMES,
reason: "Client did not send tools (passthrough mode)"
}, "=== INJECTING STANDARD TOOLS ===");
}
if (Array.isArray(toolsToSend) && toolsToSend.length > 0) {
azureBody.tools = convertAnthropicToolsToOpenRouter(toolsToSend);
azureBody.parallel_tool_calls = true; // Enable parallel tool calls
azureBody.tool_choice = "auto"; // Explicitly enable tool use (helps GPT models understand they should use tools)
logger.debug({
toolCount: toolsToSend.length,
toolNames: toolsToSend.map(t => t.name),
toolsInjected,
hasSystemMessage: !!body.system,
messageCount: messages.length,
temperature: azureBody.temperature,
sampleTool: azureBody.tools[0] // Log first tool for inspection
}, "=== SENDING TOOLS TO AZURE OPENAI ===");
}
logger.debug({
endpoint,
hasTools: !!azureBody.tools,
toolCount: azureBody.tools?.length || 0,
temperature: azureBody.temperature,
max_tokens: azureBody.max_tokens,
tool_choice: azureBody.tool_choice
}, "=== AZURE OPENAI REQUEST ===");
if (format === "deployments" || format === "models") {
return performJsonRequest(endpoint, { headers, body: azureBody }, "Azure OpenAI");
}
else if (format === "responses") {
// Responses API uses 'input' instead of 'messages' and flat tool format
// Convert tools from Chat Completions format to Responses API format
const responsesTools = azureBody.tools?.map(tool => {
if (tool.type === "function" && tool.function) {
// Flatten: {type:"function", function:{name,description,parameters}} -> {type:"function", name, description, parameters}
return {
type: "function",
name: tool.function.name,
description: tool.function.description,
parameters: tool.function.parameters
};
}
return tool;
});
// Convert messages to Responses API input format
// Responses API uses different structure for tool calls and results
const responsesInput = [];
// Track function call IDs for matching with outputs
const pendingCallIds = [];
// Detect if this is a continuation request (has tool results)
// Azure content filter triggers on full system prompt in continuations
// Check for:
// 1. tool_result blocks in user messages (Anthropic format)
// 2. tool messages (OpenAI format)
// 3. assistant messages with tool_use or tool_calls (indicates prior tool invocation)
// 4. Flattened continuation pattern from orchestrator (contains "IMPORTANT: Focus on")
const hasToolResults = (body.messages || []).some(msg => {
// Check for Anthropic format tool_result in user messages
if (msg.role === "user" && Array.isArray(msg.content)) {
if (msg.content.some(block => block.type === "tool_result")) return true;
}
// Check for OpenAI format tool messages
if (msg.role === "tool") return true;
// Check for assistant messages with tool_use (Anthropic) or tool_calls (OpenAI)
// If there's a prior tool use, this is a continuation
if (msg.role === "assistant") {
if (Array.isArray(msg.content)) {
if (msg.content.some(block => block.type === "tool_use")) return true;
}
if (msg.tool_calls && msg.tool_calls.length > 0) return true;
}
return false;
}) || azureBody.messages.some(msg => {
// Also check converted messages for flattened continuation pattern
// The orchestrator flattens tool results into user message with this marker
if (msg.role === "user" && typeof msg.content === "string") {
if (msg.content.includes("IMPORTANT: Focus on and respond ONLY to my most recent request")) return true;
}
return false;
});
if (hasToolResults) {
logger.debug({
hasToolResults: true,
originalMessageCount: (body.messages || []).length,
convertedMessageCount: azureBody.messages.length,
messageRoles: (body.messages || []).map(m => m.role),
}, "=== CONTINUATION REQUEST DETECTED - using minimal system prompt to avoid Azure content filter ===");
} else {
logger.debug({
hasToolResults: false,
originalMessageCount: (body.messages || []).length,
messageRoles: (body.messages || []).map(m => m.role),
}, "Initial request - using full system prompt");
}
// Helper function to strip <system-reminder> tags and meta-instructions from content
// Azure's jailbreak filter triggers on these instructions in continuation requests
const stripSystemReminders = (content) => {
if (!content || typeof content !== 'string') return content;
// Remove <system-reminder>...</system-reminder> blocks
let cleaned = content.replace(/<system-reminder>[\s\S]*?<\/system-reminder>/gi, '');
// Remove the continuation marker that orchestrator adds
cleaned = cleaned.replace(/---\s*IMPORTANT:\s*Focus on and respond ONLY to my most recent request[^\n]*/gi, '');
// Trim whitespace
return cleaned.trim();
};
for (const msg of azureBody.messages) {
if (msg.role === "system") {
// For continuation requests, use minimal system prompt to avoid content filter
// Azure's jailbreak detection triggers on security-related text in continuations
if (hasToolResults) {
responsesInput.push({
type: "message",
role: "developer",
content: "You are a helpful coding assistant. Continue helping the user based on the tool results."
});
} else {
// Initial request - use full system prompt
responsesInput.push({
type: "message",
role: "developer",
content: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content)
});
}
} else if (msg.role === "user") {
// Check if content contains tool_result blocks (Anthropic format)
if (Array.isArray(msg.content)) {
for (const block of msg.content) {
if (block.type === "tool_result") {
// Convert tool_result to function_call_output
// Use tool_use_id if available, otherwise pop from pending call IDs
const callId = block.tool_use_id || pendingCallIds.shift() || `call_${Date.now()}`;
responsesInput.push({
type: "function_call_output",
call_id: callId,
output: typeof block.content === 'string' ? block.content : JSON.stringify(block.content || "")
});
} else if (block.type === "text") {
// For continuation requests, strip system-reminder tags to avoid jailbreak filter
const textContent = hasToolResults ? stripSystemReminders(block.text || "") : (block.text || "");
if (textContent) { // Only add if there's content after stripping
responsesInput.push({
type: "message",
role: "user",
content: textContent
});
}
}
}
} else {
// For continuation requests, strip system-reminder tags to avoid jailbreak filter
let userContent = typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content);
if (hasToolResults) {
userContent = stripSystemReminders(userContent);
}
if (userContent) { // Only add if there's content after stripping
responsesInput.push({
type: "message",
role: "user",
content: userContent
});
}
}
} else if (msg.role === "assistant") {
// Assistant messages - handle tool_calls (OpenAI format) and tool_use blocks (Anthropic format)
if (msg.tool_calls && msg.tool_calls.length > 0) {
// OpenAI format: tool_calls array
for (const tc of msg.tool_calls) {
const callId = tc.id || `call_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
pendingCallIds.push(callId);
responsesInput.push({
type: "function_call",
call_id: callId,
name: tc.function?.name || tc.name,
arguments: typeof tc.function?.arguments === 'string' ? tc.function.arguments : JSON.stringify(tc.function?.arguments || {})
});
}
}
// Handle content - could be string, array with tool_use blocks, or array with text blocks
if (Array.isArray(msg.content)) {
// Anthropic format: content is array of blocks
for (const block of msg.content) {
if (block.type === "tool_use") {
const callId = block.id || `call_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
pendingCallIds.push(callId);
responsesInput.push({
type: "function_call",
call_id: callId,
name: block.name,
arguments: typeof block.input === 'string' ? block.input : JSON.stringify(block.input || {})
});
} else if (block.type === "text" && block.text) {
responsesInput.push({
type: "message",
role: "assistant",
content: block.text
});
}
}
} else if (msg.content) {
// String content
responsesInput.push({
type: "message",
role: "assistant",
content: msg.content
});
}
} else if (msg.role === "tool") {
// Tool results become function_call_output
// Use tool_call_id if available, otherwise pop from pending call IDs
const callId = msg.tool_call_id || pendingCallIds.shift() || `call_${Date.now()}`;
responsesInput.push({
type: "function_call_output",
call_id: callId,
output: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content)
});
}
}
const responsesBody = {
input: responsesInput,
model: azureBody.model,
max_output_tokens: azureBody.max_tokens,
tools: responsesTools,
tool_choice: azureBody.tool_choice,
stream: false
};
logger.debug({
format: "responses",
inputCount: responsesBody.input?.length,
model: responsesBody.model,
hasTools: !!responsesBody.tools
}, "Using Responses API format");
const result = await performJsonRequest(endpoint, { headers, body: responsesBody }, "Azure OpenAI Responses");
// Convert Responses API response to Chat Completions format
if (result.ok && result.json?.output) {
const outputArray = result.json.output || [];
// Find message output (contains text content)
const messageOutput = outputArray.find(o => o.type === "message");
const textContent = messageOutput?.content?.find(c => c.type === "output_text")?.text || "";
// Find function_call outputs (tool calls are separate items in output array)
const rawToolCalls = outputArray
.filter(o => o.type === "function_call")
.map(tc => ({
id: tc.call_id || tc.id || `call_${Date.now()}`,
type: "function",
function: {
name: tc.name,
arguments: typeof tc.arguments === 'string' ? tc.arguments : JSON.stringify(tc.arguments || {})
}
}));
// Deduplicate identical tool calls (GPT sometimes returns multiple identical calls)
const seenSignatures = new Set();
const toolCalls = rawToolCalls.filter(tc => {
const signature = `${tc.function.name}:${tc.function.arguments}`;
if (seenSignatures.has(signature)) {
logger.warn({
toolName: tc.function.name,
signature: signature.substring(0, 100),
}, "Filtered duplicate tool call from GPT response");
return false;
}
seenSignatures.add(signature);
return true;
});
if (rawToolCalls.length !== toolCalls.length) {
logger.debug({
originalCount: rawToolCalls.length,
dedupedCount: toolCalls.length,
removed: rawToolCalls.length - toolCalls.length,
}, "Deduplicated identical tool calls from single response");
}
logger.debug({
outputTypes: outputArray.map(o => o.type),
hasMessage: !!messageOutput,
toolCallCount: toolCalls.length,
toolCallNames: toolCalls.map(tc => tc.function.name)
}, "Parsing Responses API output");
// Convert to Chat Completions format
result.json = {
id: result.json.id,
object: "chat.completion",
created: result.json.created_at,
model: result.json.model,
choices: [{
index: 0,
message: {
role: "assistant",
content: textContent,
tool_calls: toolCalls.length > 0 ? toolCalls : undefined
},
finish_reason: toolCalls.length > 0 ? "tool_calls" : "stop"
}],
usage: result.json.usage
};
logger.debug({
convertedContent: textContent?.substring(0, 100),
hasToolCalls: toolCalls.length > 0,
toolCallCount: toolCalls.length
}, "Converted Responses API to Chat Completions format");
// Now convert from Chat Completions format to Anthropic format
const anthropicJson = convertOpenAIToAnthropic(result.json);
logger.debug({
anthropicContentTypes: anthropicJson.content?.map(c => c.type),
stopReason: anthropicJson.stop_reason
}, "Converted to Anthropic format");
return {
ok: result.ok,
status: result.status,
json: anthropicJson,
text: JSON.stringify(anthropicJson),
contentType: "application/json",
headers: result.headers,
};
}
return result;
}
else {
throw new Error(`Unsupported Azure OpenAI endpoint format: ${format}`);
}
}
async function invokeOpenAI(body) {
if (!config.openai?.apiKey) {
throw new Error("OpenAI API key is not configured.");
}
const {
convertAnthropicToolsToOpenRouter,
convertAnthropicMessagesToOpenRouter
} = require("./openrouter-utils");
const endpoint = config.openai.endpoint || "https://api.openai.com/v1/chat/completions";
const headers = {
"Authorization": `Bearer ${config.openai.apiKey}`,
"Content-Type": "application/json",
};
// Add organization header if configured
if (config.openai.organization) {
headers["OpenAI-Organization"] = config.openai.organization;
}
// Convert messages and handle system message
const messages = convertAnthropicMessagesToOpenRouter(body.messages || []);
// Anthropic uses separate 'system' field, OpenAI needs it as first message
if (body.system) {
messages.unshift({
role: "system",
content: body.system
});
}
// System prompt injection disabled - breaks model response
const openAIBody = {
model: body._suggestionModeModel || body._tierModel || config.openai.model || "gpt-4o",
messages,
temperature: body.temperature ?? 0.7,
max_tokens: body.max_tokens ?? 16384,
top_p: body.top_p ?? 1.0,
stream: body.stream ?? false
};
// Add tools - inject standard tools if client didn't send any (passthrough mode)
let toolsToSend = body.tools;
let toolsInjected = false;
if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) {
// Client didn't send tools (likely passthrough mode) - inject standard Claude Code tools
toolsToSend = STANDARD_TOOLS;
toolsInjected = true;
logger.debug({
injectedToolCount: STANDARD_TOOLS.length,
injectedToolNames: STANDARD_TOOL_NAMES,
reason: "Client did not send tools (passthrough mode)"
}, "=== INJECTING STANDARD TOOLS (OpenAI) ===");
}
if (Array.isArray(toolsToSend) && toolsToSend.length > 0) {
openAIBody.tools = convertAnthropicToolsToOpenRouter(toolsToSend);
openAIBody.parallel_tool_calls = false; // Disable parallel tool calls - GPT often makes duplicate calls
openAIBody.tool_choice = "auto"; // Let the model decide when to use tools
logger.debug({
toolCount: toolsToSend.length,
toolNames: toolsToSend.map(t => t.name),
toolsInjected
}, "=== SENDING TOOLS TO OPENAI ===");
}
logger.debug({
endpoint,
model: openAIBody.model,
hasTools: !!openAIBody.tools,
toolCount: openAIBody.tools?.length || 0,
temperature: openAIBody.temperature,
max_tokens: openAIBody.max_tokens,
}, "=== OPENAI REQUEST ===");
return performJsonRequest(endpoint, { headers, body: openAIBody }, "OpenAI");
}
async function invokeLlamaCpp(body) {
if (!config.llamacpp?.endpoint) {
throw new Error("llama.cpp endpoint is not configured.");
}
const {
convertAnthropicToolsToOpenRouter,
convertAnthropicMessagesToOpenRouter
} = require("./openrouter-utils");
const endpoint = `${config.llamacpp.endpoint}/v1/chat/completions`;
const headers = {
"Content-Type": "application/json",
};
// Add API key if configured (for secured llama.cpp servers)
if (config.llamacpp.apiKey) {
headers["Authorization"] = `Bearer ${config.llamacpp.apiKey}`;
}
// Convert messages to OpenAI format
const messages = convertAnthropicMessagesToOpenRouter(body.messages || []);
// Handle system message
if (body.system) {
messages.unshift({ role: "system", content: body.system });
}
// FIX: Deduplicate consecutive messages with same role (llama.cpp rejects this)
const deduplicated = [];
let lastRole = null;
for (const msg of messages) {
if (msg.role === lastRole) {
logger.debug({
skippedRole: msg.role,
contentPreview: typeof msg.content === 'string'
? msg.content.substring(0, 50)
: JSON.stringify(msg.content).substring(0, 50)
}, 'llama.cpp: Skipping duplicate consecutive message with same role');
continue;
}
deduplicated.push(msg);
lastRole = msg.role;
}
if (deduplicated.length !== messages.length) {
logger.debug({
originalCount: messages.length,
deduplicatedCount: deduplicated.length,
removed: messages.length - deduplicated.length,
messageRoles: messages.map(m => m.role).join(' → '),
deduplicatedRoles: deduplicated.map(m => m.role).join(' → ')
}, 'llama.cpp: Removed consecutive duplicate roles from message sequence');
}
const llamacppBody = {
messages: deduplicated,
temperature: body.temperature ?? 0.7,
max_tokens: body.max_tokens ?? 16384,
top_p: body.top_p ?? 1.0,
stream: body.stream ?? false
};
// Inject standard tools if client didn't send any
let toolsToSend = body.tools;
let toolsInjected = false;
const injectToolsLlamacpp = process.env.INJECT_TOOLS_LLAMACPP !== "false";
if (injectToolsLlamacpp && (!Array.isArray(toolsToSend) || toolsToSend.length === 0)) {
toolsToSend = STANDARD_TOOLS;
toolsInjected = true;
logger.debug({
injectedToolCount: STANDARD_TOOLS.length,
injectedToolNames: STANDARD_TOOL_NAMES,
reason: "Client did not send tools (passthrough mode)"
}, "=== INJECTING STANDARD TOOLS (llama.cpp) ===");
} else if (!injectToolsLlamacpp) {
logger.debug({}, "Tool injection disabled for llama.cpp (INJECT_TOOLS_LLAMACPP=false)");
}
if (Array.isArray(toolsToSend) && toolsToSend.length > 0) {
llamacppBody.tools = convertAnthropicToolsToOpenRouter(toolsToSend);
llamacppBody.tool_choice = "auto";
logger.debug({
toolCount: toolsToSend.length,
toolNames: toolsToSend.map(t => t.name),
toolsInjected
}, "=== SENDING TOOLS TO LLAMA.CPP ===");
}
logger.debug({
endpoint,
hasTools: !!llamacppBody.tools,
toolCount: llamacppBody.tools?.length || 0,
temperature: llamacppBody.temperature,
max_tokens: llamacppBody.max_tokens,
messageCount: llamacppBody.messages?.length || 0,
messageRoles: llamacppBody.messages?.map(m => m.role).join(' → '),
messages: llamacppBody.messages?.map((m, i) => ({
index: i,
role: m.role,
hasContent: !!m.content,
contentPreview: typeof m.content === 'string' ? m.content.substring(0, 100) : JSON.stringify(m.content).substring(0, 100),
hasToolCalls: !!m.tool_calls,
toolCallCount: m.tool_calls?.length || 0,
}))
}, "=== LLAMA.CPP REQUEST ===");
return performJsonRequest(endpoint, { headers, body: llamacppBody }, "llama.cpp");
}
async function invokeLMStudio(body) {
if (!config.lmstudio?.endpoint) {
throw new Error("LM Studio endpoint is not configured.");
}
const {
convertAnthropicToolsToOpenRouter,
convertAnthropicMessagesToOpenRouter
} = require("./openrouter-utils");
const endpoint = `${config.lmstudio.endpoint}/v1/chat/completions`;
const headers = {
"Content-Type": "application/json",
};
// Add API key if configured (for secured LM Studio servers)
if (config.lmstudio.apiKey) {
headers["Authorization"] = `Bearer ${config.lmstudio.apiKey}`;
}
// Convert messages to OpenAI format
const messages = convertAnthropicMessagesToOpenRouter(body.messages || []);
// Handle system message
if (body.system) {
messages.unshift({ role: "system", content: body.system });
}
const lmstudioBody = {
messages,
temperature: body.temperature ?? 0.7,
max_tokens: body.max_tokens ?? 16384,
top_p: body.top_p ?? 1.0,
stream: body.stream ?? false
};
// Inject standard tools if client didn't send any
let toolsToSend = body.tools;
let toolsInjected = false;
if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) {
toolsToSend = STANDARD_TOOLS;
toolsInjected = true;
logger.debug({
injectedToolCount: STANDARD_TOOLS.length,
injectedToolNames: STANDARD_TOOL_NAMES,
reason: "Client did not send tools (passthrough mode)"
}, "=== INJECTING STANDARD TOOLS (LM Studio) ===");
}
if (Array.isArray(toolsToSend) && toolsToSend.length > 0) {
lmstudioBody.tools = convertAnthropicToolsToOpenRouter(toolsToSend);
lmstudioBody.tool_choice = "auto";
logger.debug({
toolCount: toolsToSend.length,
toolNames: toolsToSend.map(t => t.name),
toolsInjected
}, "=== SENDING TOOLS TO LM STUDIO ===");
}
logger.debug({
endpoint,
hasTools: !!lmstudioBody.tools,
toolCount: lmstudioBody.tools?.length || 0,
temperature: lmstudioBody.temperature,
max_tokens: lmstudioBody.max_tokens,
}, "=== LM STUDIO REQUEST ===");
return performJsonRequest(endpoint, { headers, body: lmstudioBody }, "LM Studio");
}
async function invokeBedrock(body) {
// 1. Validate Bearer token
if (!config.bedrock?.apiKey) {
throw new Error(
"AWS Bedrock requires AWS_BEDROCK_API_KEY (Bearer token). " +
"Generate from AWS Console → Bedrock → API Keys, then set AWS_BEDROCK_API_KEY in your .env file."
);
}
const bearerToken = config.bedrock.apiKey;
logger.debug({ authMethod: "Bearer Token" }, "=== BEDROCK AUTH ===");
// 2. Inject standard tools if needed
let toolsToSend = body.tools;
let toolsInjected = false;
if (!Array.isArray(toolsToSend) || toolsToSend.length === 0) {
toolsToSend = STANDARD_TOOLS;
toolsInjected = true;
logger.debug({
injectedToolCount: STANDARD_TOOLS.length,
injectedToolNames: STANDARD_TOOL_NAMES,
reason: "Client did not send tools (passthrough mode)"
}, "=== INJECTING STANDARD TOOLS (Bedrock) ===");
}
const bedrockBody = { ...body, tools: toolsToSend };
// 4. Detect model family and convert format
const modelId = body._tierModel || config.bedrock.modelId;
const modelFamily = detectModelFamily(modelId);
logger.debug({
modelId,
modelFamily,
hasTools: !!bedrockBody.tools,
toolCount: bedrockBody.tools?.length || 0,
streaming: body.stream || false,
}, "=== BEDROCK REQUEST (FETCH) ===");
// 5. Convert to Bedrock Converse API format (simpler, more universal)
// Bedrock Converse API only allows 'user' and 'assistant' roles in messages array
// Extract system messages from messages array (if any)
const systemMessages = bedrockBody.messages.filter(msg => msg.role === 'system');
const converseBody = {
messages: bedrockBody.messages
.filter(msg => msg.role !== 'system') // Filter out system messages
.map(msg => ({
role: msg.role,
content: Array.isArray(msg.content)
? msg.content.map(c => ({ text: c.text || c.content || "" }))
: [{ text: msg.content }]
}))
};
// Add system prompt (from Anthropic system field OR extracted from messages)
if (bedrockBody.system) {
converseBody.system = [{ text: bedrockBody.system }];
} else if (systemMessages.length > 0) {
// If system messages were in the messages array, use the first one
const systemContent = Array.isArray(systemMessages[0].content)
? systemMessages[0].content.map(c => c.text || c.content || "").join("\n")
: systemMessages[0].content;
converseBody.system = [{ text: systemContent }];
}
// Add inference config
if (bedrockBody.max_tokens) {
converseBody.inferenceConfig = {
maxTokens: bedrockBody.max_tokens,
temperature: bedrockBody.temperature,
topP: bedrockBody.top_p,
};
}
// Add tools if present
if (bedrockBody.tools && bedrockBody.tools.length > 0) {
converseBody.toolConfig = {
tools: bedrockBody.tools.map(tool => ({
toolSpec: {
name: tool.name,
description: tool.description,
inputSchema: {
json: tool.input_schema
}
}
}))
};
}
// 6. Construct Bedrock Converse API endpoint
const path = `/model/${modelId}/converse`;
const host = `bedrock-runtime.${config.bedrock.region}.amazonaws.com`;
const endpoint = `https://${host}${path}`;
logger.debug({
endpoint,
authMethod: "Bearer Token",
hasSystem: !!converseBody.system,
hasTools: !!converseBody.toolConfig,
messageCount: converseBody.messages.length
}, "=== BEDROCK CONVERSE API REQUEST ===");
// 7. Prepare request headers with Bearer token
const requestHeaders = {
"Content-Type": "application/json",
"Authorization": `Bearer ${bearerToken}`
};
// 8. Make the Converse API request
try {
const response = await performJsonRequest(endpoint, {
headers: requestHeaders,
body: converseBody // Pass object, performJsonRequest will stringify it
}, "Bedrock"); // Add provider label for logging
if (!response.ok) {
const errorText = response.text; // Use property, not method
logger.error({
status: response.status,
error: errorText
}, "=== BEDROCK CONVERSE API ERROR ===");
throw new Error(`Bedrock Converse API failed: ${response.status} ${errorText}`);
}
// Parse Converse API response (already parsed by performJsonRequest)
const converseResponse = response.json; // Use property, not method
logger.debug({
stopReason: converseResponse.stopReason,
inputTokens: converseResponse.usage?.inputTokens || 0,
outputTokens: converseResponse.usage?.outputTokens || 0,
hasToolUse: !!converseResponse.output?.message?.content?.some(c => c.toolUse)
}, "=== BEDROCK CONVERSE API RESPONSE ===");
// Convert Converse API response to Anthropic format
const message = converseResponse.output.message;
const anthropicResponse = {
id: `bedrock-${Date.now()}`,
type: "message",
role: message.role,
model: modelId,
content: message.content.map(item => {
if (item.text) {
return { type: "text", text: item.text };
} else if (item.toolUse) {
return {
type: "tool_use",
id: item.toolUse.toolUseId,
name: item.toolUse.name,
input: item.toolUse.input
};
}
return item;
}),
stop_reason: converseResponse.stopReason === "end_turn" ? "end_turn" :
converseResponse.stopReason === "tool_use" ? "tool_use" :
converseResponse.stopReason === "max_tokens" ? "max_tokens" : "end_turn",
usage: {
input_tokens: converseResponse.usage?.inputTokens || 0,
output_tokens: converseResponse.usage?.outputTokens || 0,
},
};
return {
ok: true,
status: 200,
json: anthropicResponse,
actualProvider: "bedrock",
modelFamily,
};
} catch (e) {
logger.error({
error: e.message,
modelId,
region: config.bedrock.region,
endpoint,
stack: e.stack
}, "=== BEDROCK CONVERSE API ERROR ===");
throw e;
}
}
/**
* Z.AI (Zhipu) Provider
*
* Z.AI offers GLM models through an Anthropic-compatible API at ~1/7 the cost.
* Minimal transformation needed - mostly passthrough with model mapping.
*/
async function invokeZai(body) {
if (!config.zai?.apiKey) {
throw new Error("Z.AI API key is not configured. Set ZAI_API_KEY in your .env file.");
}
const endpoint = config.zai.endpoint || "https://api.z.ai/api/anthropic/v1/messages";
const isOpenAIFormat = endpoint.includes("/chat/completions");
// Model mapping: Anthropic names → Z.AI names (lowercase)
const modelMap = {
"claude-sonnet-4-5-20250929": "glm-4.7",
"claude-sonnet-4-5": "glm-4.7",
"claude-sonnet-4.5": "glm-4.7",
"claude-3-5-sonnet": "glm-4.7",
"claude-haiku-4-5-20251001": "glm-4.5-air",
"claude-haiku-4-5": "glm-4.5-air",
"claude-3-haiku": "glm-4.5-air",
};
const requestedModel = body._tierModel || body.model || config.zai.model;
let mappedModel = modelMap[requestedModel] || config.zai.model || "glm-4.7";
mappedModel = mappedModel.toLowerCase();
let zaiBody;
let headers;
if (isOpenAIFormat) {
const {
convertAnthropicToolsToOpenRouter,
convertAnthropicMessagesToOpenRouter
} = require("./openrouter-utils");
// Convert messages using existing utility
let messages = convertAnthropicMessagesToOpenRouter(body.messages || []);
// Extract system content from body.system OR from system messages in the array
let systemContent = "";
if (body.system) {
systemContent = Array.isArray(body.system)
? body.system.map(s => s.text || s).join("\n")
: body.system;
}
// Filter out any system role messages (Z.AI doesn't support system role)
// and collect their content
const filteredMessages = [];
for (const msg of messages) {
if (msg.role === "system") {
// Append system message content to systemContent
if (msg.content) {
systemContent = systemContent ? `${systemContent}\n${msg.content}` : msg.content;
}
} else {
filteredMessages.push(msg);
}
}
messages = filteredMessages;
// Prepend system content to first user message ONLY if no tools
// When tools are present, system instructions can confuse tool calling
const hasTools = Array.isArray(body.tools) && body.tools.length > 0;
if (systemContent && messages.length > 0 && !hasTools) {
const firstUserIdx = messages.findIndex(m => m.role === "user");
if (firstUserIdx >= 0) {
const firstUser = messages[firstUserIdx];
firstUser.content = `[System Instructions]\n${systemContent}\n\n[User Message]\n${firstUser.content}`;
} else {
// No user message, add system as user message
messages.unshift({ role: "user", content: systemContent });
}
} else if (systemContent && !hasTools) {
// No messages at all, add system as user
messages.push({ role: "user", content: systemContent });
}
// Convert tools if present
let tools = undefined;
if (Array.isArray(body.tools) && body.tools.length > 0) {
tools = convertAnthropicToolsToOpenRouter(body.tools);
}
zaiBody = {
model: mappedModel,
messages,
max_tokens: body.max_tokens || 16384,
temperature: body.temperature ?? 0.7,
stream: body.stream,
};
// Only add tools if present
if (tools && tools.length > 0) {
zaiBody.tools = tools;
// Use "auto" to let the model decide when to use tools
// "required" was forcing tools even for simple greetings
zaiBody.tool_choice = "auto";
// Also enable parallel tool calls
zaiBody.parallel_tool_calls = false; // Disable parallel tool calls - GPT often makes duplicate calls
}
headers = {
"Content-Type": "application/json",
"Authorization": `Bearer ${config.zai.apiKey}`,
};
} else {
// Anthropic format endpoint
zaiBody = { ...body };
zaiBody.model = mappedModel;
// Inject standard tools if client didn't send any (passthrough mode)
if (!Array.isArray(zaiBody.tools) || zaiBody.tools.length === 0) {
zaiBody.tools = STANDARD_TOOLS;
logger.debug