UNPKG

lynkr

Version:

Self-hosted LLM gateway and tier-routing proxy for Claude Code, Cursor, and Codex. Routes across Ollama, AWS Bedrock, OpenRouter, Databricks, Azure OpenAI, llama.cpp, and LM Studio with prompt caching, MCP tools, and 60-80% cost savings.

1,507 lines (1,385 loc) 147 kB
const config = require("../config"); const { invokeModel } = require("../clients/databricks"); const { appendTurnToSession } = require("../sessions/record"); const { upsertSession } = require("../sessions/store"); const { executeToolCall } = require("../tools"); const policy = require("../policy"); const logger = require("../logger"); const { needsWebFallback } = require("../policy/web-fallback"); const promptCache = require("../cache/prompt"); const tokens = require("../utils/tokens"); const systemPrompt = require("../prompts/system"); const historyCompression = require("../context/compression"); const tokenBudget = require("../context/budget"); const { applyToonCompression } = require("../context/toon"); const { classifyRequestType, selectToolsSmartly } = require("../tools/smart-selection"); const { compressMessages: headroomCompress, isEnabled: isHeadroomEnabled } = require("../headroom"); const { createAuditLogger } = require("../logger/audit-logger"); const { getResolvedIp, runWithDnsContext } = require("../clients/dns-logger"); const { getShuttingDown } = require("../api/health"); const { tryPreflight, buildSatisfiedResponse: buildPreflightResponse } = require("./preflight"); const { detectBypass, buildBypassResponse } = require("./bypass"); const crypto = require("crypto"); const { asyncClone, asyncTransform, getPoolStats } = require("../workers/helpers"); const { getSemanticCache, isSemanticCacheEnabled } = require("../cache/semantic"); const lazyLoader = require("../tools/lazy-loader"); const { areSimilarToolCalls } = require("../clients/gpt-utils"); const { getModelRegistrySync } = require("../routing/model-registry"); /** * Get destination URL for audit logging based on provider type * @param {string} providerType - Provider type (databricks, azure-anthropic, etc) * @returns {string} - Destination URL */ function getDestinationUrl(providerType) { switch (providerType) { case 'databricks': return config.databricks?.url ?? 'unknown'; case 'azure-anthropic': return config.azureAnthropic?.endpoint ?? 'unknown'; case 'ollama': return config.ollama?.endpoint ?? 'unknown'; case 'azure-openai': return config.azureOpenAI?.endpoint ?? 'unknown'; case 'openrouter': return config.openrouter?.endpoint ?? 'unknown'; case 'openai': return 'https://api.openai.com/v1/chat/completions'; case 'llamacpp': return config.llamacpp?.endpoint ?? 'unknown'; case 'lmstudio': return config.lmstudio?.endpoint ?? 'unknown'; case 'bedrock': return config.bedrock?.endpoint ?? 'unknown'; case 'zai': return config.zai?.endpoint ?? 'unknown'; case 'vertex': return config.vertex?.endpoint ?? 'unknown'; case 'moonshot': return config.moonshot?.endpoint ?? 'unknown'; case 'codex': return 'codex://app-server (local process)'; default: return 'unknown'; } } const DROP_KEYS = new Set([ "provider", "api_type", "beta", "context_management", "stream", "max_steps", "max_duration_ms", ]); const DEFAULT_AZURE_TOOLS = Object.freeze([ { name: "WebSearch", input_schema: { type: "object", properties: { query: { type: "string", description: "Search query to execute.", }, }, required: ["query"], additionalProperties: false, }, }, { name: "WebFetch", input_schema: { type: "object", properties: { url: { type: "string", description: "URL to fetch.", }, prompt: { type: "string", description: "Optional summarisation prompt.", }, }, required: ["url"], additionalProperties: false, }, }, { name: "Bash", input_schema: { type: "object", properties: { command: { type: "string", description: "Shell command to execute.", }, timeout: { type: "integer", description: "Optional timeout in milliseconds.", }, }, required: ["command"], additionalProperties: false, }, }, { name: "BashOutput", input_schema: { type: "object", properties: { bash_id: { type: "string", description: "Identifier of the background bash process.", }, }, required: ["bash_id"], additionalProperties: false, }, }, { name: "KillShell", input_schema: { type: "object", properties: { shell_id: { type: "string", description: "Identifier of the background shell to terminate.", }, }, required: ["shell_id"], additionalProperties: false, }, }, ]); const PLACEHOLDER_WEB_RESULT_REGEX = /^Web search results for query:/i; function flattenBlocks(blocks) { if (!Array.isArray(blocks)) return String(blocks ?? ""); return blocks .map((block) => { if (!block) return ""; if (typeof block === "string") return block; if (block.type === "text" && typeof block.text === "string") return block.text; if (block.type === "tool_result") { const payload = block?.content ?? ""; return typeof payload === "string" ? payload : JSON.stringify(payload); } if (block.input_text) return block.input_text; return ""; }) .join(""); } function normaliseMessages(payload, options = {}) { const flattenContent = options.flattenContent !== false; const normalised = []; if (Array.isArray(payload.system) && payload.system.length) { const text = flattenBlocks(payload.system).trim(); if (text) normalised.push({ role: "system", content: text }); } if (Array.isArray(payload.messages)) { for (const message of payload.messages) { if (!message) continue; const role = message.role ?? "user"; const rawContent = message.content; let content; if (Array.isArray(rawContent)) { const hasToolBlocks = rawContent.some( (b) => b && (b.type === "tool_use" || b.type === "tool_result" || b.type === "document" || b.type === "image" || b.type === "thinking") ); if (hasToolBlocks) { content = rawContent.slice(); } else { content = flattenContent ? flattenBlocks(rawContent) : rawContent.slice(); } } else if (rawContent === undefined || rawContent === null) { content = flattenContent ? "" : rawContent; } else if (typeof rawContent === "string") { content = rawContent; } else if (flattenContent) { content = String(rawContent); } else { content = rawContent; } const entry = { role, content }; if (Array.isArray(message.tool_calls) && message.tool_calls.length > 0) { entry.tool_calls = message.tool_calls; } normalised.push(entry); } } return normalised; } function normaliseTools(tools) { if (!Array.isArray(tools) || tools.length === 0) return undefined; return tools.map((tool) => ({ type: "function", function: { name: tool.name || "unnamed_tool", description: tool.description || tool.name || "No description provided", parameters: tool.input_schema ?? {}, }, })); } /** * Ensure tools are in Anthropic format for Databricks/Claude API * Databricks expects: {name, description, input_schema} * NOT OpenAI format: {type: "function", function: {...}} */ function ensureAnthropicToolFormat(tools) { if (!Array.isArray(tools) || tools.length === 0) return undefined; return tools.map((tool) => { // Ensure input_schema has required 'type' field let input_schema = tool.input_schema || { type: "object", properties: {} }; // If input_schema exists but missing 'type', add it if (input_schema && !input_schema.type) { input_schema = { type: "object", ...input_schema }; } return { name: tool.name || "unnamed_tool", description: tool.description || tool.name || "No description provided", input_schema, }; }); } function stripPlaceholderWebSearchContent(message) { if (!message || message.content === undefined || message.content === null) { return message; } if (typeof message.content === "string") { return PLACEHOLDER_WEB_RESULT_REGEX.test(message.content.trim()) ? null : message; } if (!Array.isArray(message.content)) { return message; } const filtered = message.content.filter((block) => { if (!block) return false; if (block.type === "tool_result") { const content = typeof block.content === "string" ? block.content.trim() : ""; if (PLACEHOLDER_WEB_RESULT_REGEX.test(content)) { return false; } } if (block.type === "text" && typeof block.text === "string") { if (PLACEHOLDER_WEB_RESULT_REGEX.test(block.text.trim())) { return false; } } return true; }); if (filtered.length === 0) { return null; } if (filtered.length === message.content.length) { return message; } return { ...message, content: filtered, }; } function isPlaceholderToolResultMessage(message) { if (!message) return false; if (message.role !== "user" && message.role !== "tool") return false; if (typeof message.content === "string") { return PLACEHOLDER_WEB_RESULT_REGEX.test(message.content.trim()); } if (!Array.isArray(message.content) || message.content.length === 0) { return false; } return message.content.every((block) => { if (!block || block.type !== "tool_result") return false; const text = typeof block.content === "string" ? block.content.trim() : ""; return PLACEHOLDER_WEB_RESULT_REGEX.test(text); }); } function removeMatchingAssistantToolUse(cleanMessages, toolUseId) { if (!toolUseId || cleanMessages.length === 0) return; const lastIndex = cleanMessages.length - 1; const candidate = cleanMessages[lastIndex]; if (!candidate || candidate.role !== "assistant") return; if (Array.isArray(candidate.content)) { const remainingBlocks = candidate.content.filter((block) => { if (!block || block.type !== "tool_use") return true; return block.id !== toolUseId; }); if (remainingBlocks.length === 0) { cleanMessages.pop(); } else if (remainingBlocks.length !== candidate.content.length) { cleanMessages[lastIndex] = { ...candidate, content: remainingBlocks, }; } return; } if (Array.isArray(candidate.tool_calls)) { const remainingCalls = candidate.tool_calls.filter((call) => call.id !== toolUseId); if (remainingCalls.length === 0) { cleanMessages.pop(); } else if (remainingCalls.length !== candidate.tool_calls.length) { cleanMessages[lastIndex] = { ...candidate, tool_calls: remainingCalls, }; } } } const WEB_SEARCH_NORMALIZED = new Set(["websearch", "web_search", "web-search"]); function normaliseToolIdentifier(name = "") { return String(name).toLowerCase().replace(/[^a-z0-9]/g, ""); } function buildWebSearchSummary(rawContent, options = {}) { if (rawContent === undefined || rawContent === null) return null; let data = rawContent; if (typeof data === "string") { const trimmed = data.trim(); if (!trimmed) return null; try { data = JSON.parse(trimmed); } catch { return null; } } if (!data || typeof data !== "object") return null; const results = Array.isArray(data.results) ? data.results : []; if (results.length === 0) return null; const maxItems = Number.isInteger(options.maxItems) && options.maxItems > 0 ? options.maxItems : 5; const lines = []; for (let i = 0; i < results.length && lines.length < maxItems; i += 1) { const item = results[i]; if (!item || typeof item !== "object") continue; const title = item.title || item.name || item.url || item.href; const url = item.url || item.href || ""; const snippet = item.snippet || item.summary || item.excerpt || ""; if (!title && !snippet) continue; let line = `${lines.length + 1}. ${title ?? snippet}`; if (snippet && snippet !== title) { line += ` — ${snippet}`; } if (url) { line += ` (${url})`; } lines.push(line); } if (lines.length === 0) return null; return `Top search hits:\n${lines.join("\n")}`; } /** * Count tool_use and tool_result blocks in message history. * Only counts tools from the CURRENT TURN (after the last user text message). * This prevents the guard from blocking new questions after a previous loop. */ function countToolCallsInHistory(messages) { if (!Array.isArray(messages)) return { toolUseCount: 0, toolResultCount: 0 }; // Find the index of the last user message that contains actual text (not just tool_result) let lastUserTextIndex = -1; for (let i = messages.length - 1; i >= 0; i--) { const msg = messages[i]; if (msg?.role !== 'user') continue; // Check if this user message has actual text content (not just tool_result) if (typeof msg.content === 'string' && msg.content.trim().length > 0) { lastUserTextIndex = i; break; } if (Array.isArray(msg.content)) { const hasText = msg.content.some(block => (block?.type === 'text' && block?.text?.trim?.().length > 0) || (block?.type === 'input_text' && block?.input_text?.trim?.().length > 0) ); if (hasText) { lastUserTextIndex = i; break; } } } // Count only tool_use/tool_result AFTER the last user text message let toolUseCount = 0; let toolResultCount = 0; const startIndex = lastUserTextIndex >= 0 ? lastUserTextIndex : 0; for (let i = startIndex; i < messages.length; i++) { const msg = messages[i]; if (!msg || !Array.isArray(msg.content)) continue; for (const block of msg.content) { if (block?.type === 'tool_use') toolUseCount++; if (block?.type === 'tool_result') toolResultCount++; } } return { toolUseCount, toolResultCount, lastUserTextIndex }; } /** * Inject a "stop looping" instruction if there are too many tool calls in history. * This helps prevent infinite loops when the model keeps calling tools instead of responding. * * @param {Array} messages - The conversation messages * @param {number} threshold - Max tool results before injection (default: 5) * @returns {Array} - Messages with stop instruction injected if needed */ function injectToolLoopStopInstruction(messages, threshold = 5) { if (!Array.isArray(messages)) return messages; const { toolResultCount } = countToolCallsInHistory(messages); if (toolResultCount >= threshold) { logger.warn({ toolResultCount, threshold, }, "[ToolLoopGuard] Too many tool results in conversation - injecting stop instruction"); // Inject instruction to stop tool calls and provide a final answer const stopInstruction = { role: "user", content: `⚠️ IMPORTANT: You have already executed ${toolResultCount} tool calls in this conversation. This is likely an infinite loop. STOP calling tools immediately and provide a direct text response to the user based on the information you have gathered. If you cannot complete the task, explain why. DO NOT call any more tools.`, }; // Add to end of messages return [...messages, stopInstruction]; } return messages; } // === CROSS-REQUEST TOOL CALL DEDUP TRACKING === // These helpers track tool call signatures across multiple HTTP requests within // the same session (client/passthrough mode). The inner-loop detection in // runAgentLoop() only sees one request at a time, so repeated calls across // requests escape it. const DEDUP_MAX_SIGNATURES = 50; const DEDUP_WARN_THRESHOLD = 5; const DEDUP_TERMINATE_THRESHOLD = 8; /** * Initialise session.metadata.toolCallDedup if missing. * @param {Object} session */ function ensureDedupStructure(session) { if (!session || !session.metadata) return; if (!session.metadata.toolCallDedup) { session.metadata.toolCallDedup = { signatures: {}, similarGroups: {}, lastResetAt: Date.now(), warningInjected: false, }; } } /** * Record a tool call into the cross-request dedup tracker. * Handles similarity merging and enforces the 50-entry cap. * @param {Object} session * @param {Object} toolCall - tool_use block (Anthropic format: { name, input, id }) */ function recordCrossRequestToolCall(session, toolCall) { if (!session?.metadata) return; ensureDedupStructure(session); const dedup = session.metadata.toolCallDedup; const signature = getToolCallSignature(toolCall); const toolName = toolCall.function?.name ?? toolCall.name ?? 'unknown'; const args = toolCall.function?.arguments ?? toolCall.input; const argsPreview = (typeof args === 'string' ? args : JSON.stringify(args ?? {})).substring(0, 200); const now = Date.now(); // Check if this signature maps to a canonical via similarity groups const canonicalSig = dedup.similarGroups[signature] || signature; if (dedup.signatures[canonicalSig]) { dedup.signatures[canonicalSig].count += 1; dedup.signatures[canonicalSig].lastSeen = now; } else { // Check for similar existing entries before creating a new one let mergedInto = null; for (const [existingSig, existingData] of Object.entries(dedup.signatures)) { // Build a fake call object from stored data to compare with areSimilarToolCalls const existingCall = { name: existingData.toolName, input: existingData.argsPreview, }; if (areSimilarToolCalls(toolCall, existingCall)) { // Merge: map this signature to the existing canonical dedup.similarGroups[signature] = existingSig; dedup.signatures[existingSig].count += 1; dedup.signatures[existingSig].lastSeen = now; mergedInto = existingSig; logger.debug({ newSignature: signature, canonicalSignature: existingSig, toolName, count: dedup.signatures[existingSig].count, }, "Cross-request tool dedup: merged similar call"); break; } } if (!mergedInto) { // New unique signature dedup.signatures[signature] = { count: 1, toolName, firstSeen: now, lastSeen: now, argsPreview, }; } } // Enforce cap: evict oldest entries if over limit const sigKeys = Object.keys(dedup.signatures); if (sigKeys.length > DEDUP_MAX_SIGNATURES) { const sorted = sigKeys.sort( (a, b) => dedup.signatures[a].lastSeen - dedup.signatures[b].lastSeen ); const toRemove = sorted.slice(0, sigKeys.length - DEDUP_MAX_SIGNATURES); for (const key of toRemove) { delete dedup.signatures[key]; // Also clean up any similarGroups pointing to this key for (const [groupSig, canonical] of Object.entries(dedup.similarGroups)) { if (canonical === key) delete dedup.similarGroups[groupSig]; } } } } /** * Return the highest dedup count, the associated tool name, and signature. * @param {Object} session * @returns {{ maxCount: number, toolName: string|null, signature: string|null }} */ function getMaxDedupCount(session) { if (!session?.metadata?.toolCallDedup?.signatures) { return { maxCount: 0, toolName: null, signature: null }; } const sigs = session.metadata.toolCallDedup.signatures; let maxCount = 0; let toolName = null; let signature = null; for (const [sig, data] of Object.entries(sigs)) { if (data.count > maxCount) { maxCount = data.count; toolName = data.toolName; signature = sig; } } return { maxCount, toolName, signature }; } /** * Extract tool_use blocks from messages that appear after the last user text message. * These are the tool calls from the current assistant turn that the client is sending back. * @param {Array} messages * @returns {Array} - Array of tool_use-like objects */ function extractToolUseFromCurrentTurn(messages) { if (!Array.isArray(messages)) return []; // Find last user text message let lastUserTextIndex = -1; for (let i = messages.length - 1; i >= 0; i--) { const msg = messages[i]; if (msg?.role !== 'user') continue; if (typeof msg.content === 'string' && msg.content.trim().length > 0) { lastUserTextIndex = i; break; } if (Array.isArray(msg.content)) { const hasText = msg.content.some(block => (block?.type === 'text' && block?.text?.trim?.().length > 0) || (block?.type === 'input_text' && block?.input_text?.trim?.().length > 0) ); if (hasText) { lastUserTextIndex = i; break; } } } const toolUseBlocks = []; const startIndex = lastUserTextIndex >= 0 ? lastUserTextIndex : 0; for (let i = startIndex; i < messages.length; i++) { const msg = messages[i]; if (msg?.role !== 'assistant') continue; if (!Array.isArray(msg.content)) continue; for (const block of msg.content) { if (block?.type === 'tool_use') { toolUseBlocks.push(block); } } } return toolUseBlocks; } /** * Reset dedup tracking. Called when a new user question is detected. * @param {Object} session */ function resetDedupTracking(session) { if (!session?.metadata) return; session.metadata.toolCallDedup = { signatures: {}, similarGroups: {}, lastResetAt: Date.now(), warningInjected: false, }; logger.debug({ sessionId: session?.id ?? null }, "Cross-request tool dedup: reset tracking for new user question"); } function sanitiseAzureTools(tools) { if (!Array.isArray(tools) || tools.length === 0) return undefined; const allowed = new Set([ "WebSearch", "Web_Search", "websearch", "web_search", "web-fetch", "webfetch", "web_fetch", "bash", "shell", "bash_output", "bashoutput", "kill_shell", "killshell", ]); const cleaned = new Map(); for (const tool of tools) { if (!tool || typeof tool !== "object") continue; const rawName = typeof tool.name === "string" ? tool.name.trim() : ""; if (!rawName) continue; const identifier = normaliseToolIdentifier(rawName); if (!allowed.has(identifier)) continue; if (cleaned.has(identifier)) continue; let schema = null; if (tool.input_schema && typeof tool.input_schema === "object") { schema = tool.input_schema; } else if (tool.parameters && typeof tool.parameters === "object") { schema = tool.parameters; } if (!schema || typeof schema !== "object") { schema = { type: "object" }; } cleaned.set(identifier, { name: rawName, input_schema: schema, }); } return cleaned.size > 0 ? Array.from(cleaned.values()) : undefined; } function parseToolArguments(toolCall) { if (!toolCall?.function?.arguments) return {}; const raw = toolCall.function.arguments; if (typeof raw !== "string") return raw ?? {}; try { return JSON.parse(raw); } catch { return {}; } } function parseExecutionContent(content) { if (content === undefined || content === null) { return null; } if (typeof content === "string") { const trimmed = content.trim(); if (trimmed.startsWith("{") || trimmed.startsWith("[")) { try { const parsed = JSON.parse(trimmed); // Handle Anthropic content blocks array - extract text if (Array.isArray(parsed)) { const textParts = parsed .filter(block => block && typeof block === 'object') .map(block => { if (block.type === 'text' && typeof block.text === 'string') { return block.text; } // Handle other block types gracefully if (block.text) return block.text; if (block.content) return typeof block.content === 'string' ? block.content : JSON.stringify(block.content); return null; }) .filter(text => text !== null); if (textParts.length > 0) { return textParts.join('\n'); } } return parsed; } catch { return content; } } return content; } // Handle content that's already an array (content blocks) if (Array.isArray(content)) { const textParts = content .filter(block => block && typeof block === 'object') .map(block => { if (block.type === 'text' && typeof block.text === 'string') { return block.text; } if (block.text) return block.text; if (block.content) return typeof block.content === 'string' ? block.content : JSON.stringify(block.content); return null; }) .filter(text => text !== null); if (textParts.length > 0) { return textParts.join('\n'); } } return content; } function createFallbackAssistantMessage(providerType, { text, toolCall }) { if (providerType === "azure-anthropic") { const blocks = []; if (typeof text === "string" && text.trim().length > 0) { blocks.push({ type: "text", text: text.trim() }); } blocks.push({ type: "tool_use", id: toolCall.id ?? `tool_${Date.now()}`, name: toolCall.function?.name ?? "tool", input: parseToolArguments(toolCall), }); return { role: "assistant", content: blocks, }; } return { role: "assistant", content: text ?? "", tool_calls: [ { id: toolCall.id, function: toolCall.function, }, ], }; } function createFallbackToolResultMessage(providerType, { toolCall, execution }) { const toolName = execution.name ?? toolCall.function?.name ?? "tool"; const toolId = execution.id ?? toolCall.id ?? `tool_${Date.now()}`; if (providerType === "azure-anthropic") { const parsed = parseExecutionContent(execution.content); let contentBlocks; if (typeof parsed === "string" || parsed === null) { contentBlocks = [ { type: "tool_result", tool_use_id: toolId, content: parsed ?? "", is_error: execution.ok === false, }, ]; } else { contentBlocks = [ { type: "tool_result", tool_use_id: toolId, content: JSON.stringify(parsed), is_error: execution.ok === false, }, ]; } return { role: "user", content: contentBlocks, }; } return { role: "tool", tool_call_id: toolId, name: toolCall.function?.name ?? toolName, content: execution.content, }; } function extractWebSearchUrls(messages, options = {}, toolNameLookup = new Map()) { const max = Number.isInteger(options.max) && options.max > 0 ? options.max : 10; const urls = []; const seen = new Set(); if (!Array.isArray(messages)) return urls; for (let i = messages.length - 1; i >= 0; i -= 1) { const message = messages[i]; if (!message) continue; if (Array.isArray(message.content)) { for (const part of message.content) { if (!part || part.type !== "tool_result") continue; const toolIdentifier = toolNameLookup.get(part.tool_use_id ?? "") ?? null; if (!toolIdentifier || !WEB_SEARCH_NORMALIZED.has(toolIdentifier)) continue; let data = part.content; if (typeof data === "string") { try { data = JSON.parse(data); } catch { continue; } } if (!data || typeof data !== "object") continue; const results = Array.isArray(data.results) ? data.results : []; for (const entry of results) { if (!entry || typeof entry !== "object") continue; const url = entry.url ?? entry.href ?? null; if (!url) continue; if (seen.has(url)) continue; seen.add(url); urls.push(url); if (urls.length >= max) return urls; } } continue; } if (message.role === "tool") { const toolIdentifier = normaliseToolIdentifier(message.name ?? ""); if (!WEB_SEARCH_NORMALIZED.has(toolIdentifier)) continue; let data = message.content; if (typeof data === "string") { try { data = JSON.parse(data); } catch { continue; } } if (!data || typeof data !== "object") continue; const results = Array.isArray(data.results) ? data.results : []; for (const entry of results) { if (!entry || typeof entry !== "object") continue; const url = entry.url ?? entry.href ?? null; if (!url) continue; if (seen.has(url)) continue; seen.add(url); urls.push(url); if (urls.length >= max) return urls; } continue; } } return urls; } function normaliseToolChoice(choice) { if (!choice) return undefined; if (typeof choice === "string") return choice; // "auto", "none" if (choice.type === "tool" && choice.name) { return { type: "function", function: { name: choice.name } }; } return undefined; } /** * Strip thinking-style reasoning from Ollama model outputs * Patterns to remove: * - Lines starting with bullet points (●, •, -, *) * - Explanatory reasoning before the actual response * - Multiple newlines used to separate thinking from response */ function stripThinkingBlocks(text) { if (typeof text !== "string") return text; // Split into lines const lines = text.split("\n"); const cleanedLines = []; let inThinkingBlock = false; let consecutiveEmptyLines = 0; for (const line of lines) { const trimmed = line.trim(); // Detect thinking block markers (bullet points followed by reasoning) if (/^[●•\-\*]\s/.test(trimmed)) { inThinkingBlock = true; continue; } // Empty lines might separate thinking from response if (trimmed === "") { consecutiveEmptyLines++; // If we've seen 2+ empty lines, likely end of thinking block if (consecutiveEmptyLines >= 2) { inThinkingBlock = false; } continue; } // Reset empty line counter consecutiveEmptyLines = 0; // Skip lines that are part of thinking block if (inThinkingBlock) { continue; } // Keep this line cleanedLines.push(line); } return cleanedLines.join("\n").trim(); } /** * Convert legacy Ollama /api/chat response to Anthropic Messages format. * Used when Ollama < v0.14.0 (no native Anthropic endpoint). */ function ollamaToAnthropicResponse(ollamaResponse, requestedModel) { const message = ollamaResponse?.message ?? {}; const rawContent = message.content || ""; const toolCalls = message.tool_calls || []; const contentItems = []; if (typeof rawContent === "string" && rawContent.trim()) { const cleanedContent = stripThinkingBlocks(rawContent); if (cleanedContent) { contentItems.push({ type: "text", text: cleanedContent }); } } // Convert tool calls from OpenAI function-calling format to Anthropic tool_use if (Array.isArray(toolCalls) && toolCalls.length > 0) { for (const toolCall of toolCalls) { const func = toolCall.function || {}; let input = {}; if (func.arguments) { if (typeof func.arguments === "string") { try { input = JSON.parse(func.arguments); } catch { input = {}; } } else if (typeof func.arguments === "object") { input = func.arguments; } } contentItems.push({ type: "tool_use", id: toolCall.id || `toolu_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`, name: func.name || "unknown", input, }); } } if (contentItems.length === 0) { contentItems.push({ type: "text", text: "" }); } const inputTokens = ollamaResponse.prompt_eval_count ?? 0; const outputTokens = ollamaResponse.eval_count ?? 0; return { id: `msg_${Date.now()}`, type: "message", role: "assistant", model: requestedModel, content: contentItems, stop_reason: toolCalls.length > 0 ? "tool_use" : ollamaResponse.done ? "end_turn" : "max_tokens", stop_sequence: null, usage: { input_tokens: inputTokens, output_tokens: outputTokens, cache_creation_input_tokens: 0, cache_read_input_tokens: 0, }, }; } function toAnthropicResponse(openai, requestedModel, wantsThinking) { const choice = openai?.choices?.[0]; const message = choice?.message ?? {}; const usage = openai?.usage ?? {}; const toolCalls = Array.isArray(message.tool_calls) ? message.tool_calls : []; const contentItems = []; // Pass through real reasoning_content as a thinking block const reasoningContent = typeof message.reasoning_content === "string" ? message.reasoning_content : ""; if (reasoningContent && wantsThinking) { contentItems.push({ type: "thinking", thinking: reasoningContent }); } else if (wantsThinking) { contentItems.push({ type: "thinking", thinking: "Reasoning not available from the backing model.", }); } if (toolCalls.length) { for (const call of toolCalls) { let input = {}; try { input = call.function?.arguments ? JSON.parse(call.function.arguments) : {}; } catch { input = {}; } contentItems.push({ type: "tool_use", id: call.id ?? `tool_${Date.now()}`, name: call.function?.name ?? "function", input, }); } } const textContent = message.content; if (typeof textContent === "string" && textContent.trim()) { contentItems.push({ type: "text", text: textContent }); } else if (Array.isArray(textContent)) { for (const part of textContent) { if (typeof part === "string") { contentItems.push({ type: "text", text: part }); } else if (part?.type === "text" && typeof part.text === "string") { contentItems.push({ type: "text", text: part.text }); } } } if (contentItems.length === 0) { contentItems.push({ type: "text", text: "" }); } return { id: openai.id ?? `msg_${Date.now()}`, type: "message", role: "assistant", model: requestedModel, content: contentItems, stop_reason: choice?.finish_reason === "stop" ? "end_turn" : choice?.finish_reason === "length" ? "max_tokens" : choice?.finish_reason === "tool_calls" ? "tool_use" : choice?.finish_reason ?? "end_turn", stop_sequence: null, usage: { input_tokens: usage.prompt_tokens ?? 0, output_tokens: usage.completion_tokens ?? 0, cache_creation_input_tokens: 0, cache_read_input_tokens: 0, }, }; } function sanitizePayload(payload) { const { clonePayloadSmart } = require("../utils/payload"); const providerType = config.modelProvider?.type ?? "databricks"; const willFlatten = providerType !== "azure-anthropic"; const clean = clonePayloadSmart(payload ?? {}, { willFlatten }); const requestedModel = (typeof payload?.model === "string" && payload.model.trim().length > 0 ? payload.model.trim() : null) ?? config.modelProvider?.defaultModel ?? "databricks-claude-sonnet-4-5"; clean.model = requestedModel; if (!clean.max_tokens) { clean.max_tokens = 16384; } const flattenContent = willFlatten; clean.messages = normaliseMessages(clean, { flattenContent }).filter((msg) => { const hasToolCalls = Array.isArray(msg?.tool_calls) && msg.tool_calls.length > 0; if (!msg?.content) { return hasToolCalls; } if (typeof msg.content === "string") { return hasToolCalls || msg.content.trim().length > 0; } if (Array.isArray(msg.content)) { return hasToolCalls || msg.content.length > 0; } if (typeof msg.content === "object" && msg.content !== null) { return hasToolCalls || Object.keys(msg.content).length > 0; } return hasToolCalls; }); if (providerType === "azure-anthropic") { const cleanedMessages = []; for (const message of clean.messages) { if (isPlaceholderToolResultMessage(message)) { let toolUseId = null; if (Array.isArray(message.content)) { for (const block of message.content) { if (block?.type === "tool_result" && block.tool_use_id) { toolUseId = block.tool_use_id; break; } } } removeMatchingAssistantToolUse(cleanedMessages, toolUseId); continue; } const stripped = stripPlaceholderWebSearchContent(message); if (stripped) { cleanedMessages.push(stripped); } } clean.messages = cleanedMessages; const systemChunks = []; clean.messages = clean.messages.filter((msg) => { if (msg?.role === "tool") { return false; } if (msg?.role === "system") { if (typeof msg.content === "string" && msg.content.trim().length > 0) { systemChunks.push(msg.content.trim()); } return false; } return true; }); if (systemChunks.length > 0) { clean.system = systemChunks.join("\n\n"); } else if (typeof clean.system === "string" && clean.system.trim().length > 0) { clean.system = clean.system.trim(); } else { delete clean.system; } const azureDefaultModel = config.modelProvider?.defaultModel && config.modelProvider.defaultModel.trim().length > 0 ? config.modelProvider.defaultModel.trim() : "claude-opus-4-5"; clean.model = azureDefaultModel; } else if (providerType === "ollama") { // Ollama format conversion // Check if model supports tools const { modelNameSupportsTools } = require("../clients/ollama-utils"); const modelSupportsTools = modelNameSupportsTools(config.ollama?.model); if (!modelSupportsTools) { // Filter out tool_result content blocks for models without tool support clean.messages = clean.messages .map((msg) => { if (Array.isArray(msg.content)) { // Filter out tool_use and tool_result blocks const textBlocks = msg.content.filter( (block) => block.type === "text" && block.text ); if (textBlocks.length > 0) { // Convert to simple string format for Ollama return { role: msg.role, content: textBlocks.map((b) => b.text).join("\n"), }; } return null; } return msg; }) .filter(Boolean); } else { // Keep tool blocks for tool-capable models // But flatten content to simple string for better compatibility clean.messages = clean.messages.map((msg) => { if (Array.isArray(msg.content)) { const textBlocks = msg.content.filter( (block) => block.type === "text" && block.text ); if (textBlocks.length > 0) { return { role: msg.role, content: textBlocks.map((b) => b.text).join("\n"), }; } } return msg; }); } // Keep system prompt separate for Ollama (same as other providers) // Let invokeOllama() handle body.system properly } else { delete clean.system; } DROP_KEYS.forEach((key) => delete clean[key]); // Conditionally keep or strip the `thinking` parameter based on provider const { getThinkingBehavior } = require("../clients/provider-capabilities"); const thinkingBehavior = getThinkingBehavior(providerType, clean.model); if (clean.thinking && thinkingBehavior !== "native") { delete clean.thinking; } if (Array.isArray(clean.tools) && clean.tools.length === 0) { delete clean.tools; } else if (providerType === "databricks") { const tools = normaliseTools(clean.tools); if (tools) clean.tools = tools; else delete clean.tools; } else if (providerType === "azure-anthropic") { const tools = sanitiseAzureTools(clean.tools); clean.tools = tools && tools.length > 0 ? tools : DEFAULT_AZURE_TOOLS.map((tool) => ({ name: tool.name, input_schema: JSON.parse(JSON.stringify(tool.input_schema)), })); delete clean.tool_choice; } else if (providerType === "ollama") { // Check if model supports tools const { modelNameSupportsTools } = require("../clients/ollama-utils"); const modelSupportsTools = modelNameSupportsTools(config.ollama?.model); // Check if this is a simple conversational message (no tools needed) const isConversational = (() => { if (!Array.isArray(clean.messages) || clean.messages.length === 0) { return false; } const lastMessage = clean.messages[clean.messages.length - 1]; if (lastMessage?.role !== "user") { return false; } const content = typeof lastMessage.content === "string" ? lastMessage.content : ""; const trimmed = content.trim().toLowerCase(); // Simple greetings if (/^(hi|hello|hey|good morning|good afternoon|good evening|howdy|greetings)[\s\.\!\?]*$/.test(trimmed)) { return "greeting"; } // Conversational phrases that don't need tools (thanks, farewells, acknowledgements) if (/^(thanks|thank you|thx|ty|bye|goodbye|see you|ok|okay|cool|nice|great|awesome|sure|got it|sounds good|no worries|np|cheers)[\s\.\!\?]*$/.test(trimmed)) { return "conversational"; } return false; })(); if (isConversational) { // Strip all tools for simple conversational messages delete clean.tools; delete clean.tool_choice; logger.debug({ model: config.ollama?.model, reason: isConversational, }, "Ollama conversational mode - tools removed"); } else if (modelSupportsTools && Array.isArray(clean.tools) && clean.tools.length > 0) { // Keep all tools — Ollama receives them in Anthropic format (native API) // or they get converted to OpenAI format in invokeOllama (legacy API) clean.tools = ensureAnthropicToolFormat(clean.tools); } else { // Remove tools for models without tool support delete clean.tools; delete clean.tool_choice; } } else if (providerType === "openrouter") { // OpenRouter supports tools - keep them as-is // Tools are already in Anthropic format and will be converted by openrouter-utils if (!Array.isArray(clean.tools) || clean.tools.length === 0) { delete clean.tools; } } else if (providerType === "zai") { // Z.AI (Zhipu) supports tools - keep them in Anthropic format // They will be converted to OpenAI format in invokeZai if (!Array.isArray(clean.tools) || clean.tools.length === 0) { delete clean.tools; } else { // Ensure tools are in Anthropic format clean.tools = ensureAnthropicToolFormat(clean.tools); } } else if (providerType === "vertex") { // Vertex AI supports tools - keep them in Anthropic format if (!Array.isArray(clean.tools) || clean.tools.length === 0) { delete clean.tools; } else { clean.tools = ensureAnthropicToolFormat(clean.tools); } } else if (providerType === "moonshot") { // Moonshot supports tools - keep them in Anthropic format // They will be converted to OpenAI format in invokeMoonshot if (!Array.isArray(clean.tools) || clean.tools.length === 0) { delete clean.tools; } else { clean.tools = ensureAnthropicToolFormat(clean.tools); } } else if (Array.isArray(clean.tools)) { // Unknown provider - remove tools for safety delete clean.tools; } if (providerType === "databricks") { const toolChoice = normaliseToolChoice(clean.tool_choice); if (toolChoice !== undefined) clean.tool_choice = toolChoice; else delete clean.tool_choice; } else if (providerType === "ollama") { // Tool choice handling const { modelNameSupportsTools } = require("../clients/ollama-utils"); const modelSupportsTools = modelNameSupportsTools(config.ollama?.model); if (!modelSupportsTools) { delete clean.tool_choice; } // For tool-capable models, Ollama doesn't support tool_choice, so remove it delete clean.tool_choice; } else if (clean.tool_choice === undefined || clean.tool_choice === null) { delete clean.tool_choice; } // Smart tool selection (server mode only). In client/passthrough mode the // client (e.g. Claude Code) owns tool execution, so stripping its tools would // make the model emit calls for tools we removed — they then get dropped as // "hallucinated" and the session makes no progress. Pass tools through intact. const inClientMode = config.toolExecutionMode === "client" || config.toolExecutionMode === "passthrough"; if (!inClientMode && config.smartToolSelection?.enabled && Array.isArray(clean.tools) && clean.tools.length > 0) { const classification = classifyRequestType(clean); const selectedTools = selectToolsSmartly(clean.tools, classification, { provider: providerType, tokenBudget: config.smartToolSelection.tokenBudget, config: config.smartToolSelection }); // Only log if tools were actually filtered (avoid logging overhead) if (selectedTools.length !== clean.tools.length) { logger.info({ requestType: classification.type, originalCount: clean.tools.length, selectedCount: selectedTools.length, provider: providerType }, "Smart tool selection applied"); } clean.tools = selectedTools.length > 0 ? selectedTools : undefined; } // Always false: the agent loop needs buffered JSON to parse tool calls. // Lynkr synthesises SSE back to the client from the buffered response. clean.stream = false; if ( config.modelProvider?.type === "azure-anthropic" && logger && typeof logger.debug === "function" ) { try { logger.debug( { model: clean.model, temperature: clean.temperature ?? null, max_tokens: clean.max_tokens ?? null, tool_count: Array.isArray(clean.tools) ? clean.tools.length : 0, has_tool_choice: clean.tool_choice !== undefined, messages: clean.messages, }, "Azure Anthropic sanitized payload", ); logger.debug( { payload: JSON.parse(JSON.stringify(clean)), }, "Azure Anthropic request payload", ); } catch (err) { logger.debug({ err }, "Failed logging Azure Anthropic payload"); } } // Optional TOON conversion for large JSON message payloads (prompt context only). // Run this BEFORE message coalescing to preserve parseable JSON boundaries. applyToonCompression(clean, config.toon, { logger }); // FIX: Handle consecutive messages with the same role (causes llama.cpp 400 error) // Strategy: Merge consecutive same-role messages, but NEVER merge messages // that contain tool_use or tool_result blocks — they must stay intact for // the provider's tool-call protocol. if (Array.isArray(clean.messages) && clean.messages.length > 0) { const merged = []; const messages = clean.messages; const hasToolContent = (msg) => { if (Array.isArray(msg?.content)) { return msg.content.some(b => b && (b.type === 'tool_use' || b.type === 'tool_result')); } return Array.isArray(msg?.tool_calls) && msg.tool_calls.length > 0; }; for (let i = 0; i < messages.length; i++) { const msg = messages[i]; const prev = merged.length > 0 ? merged[merged.length - 1] : null; if (prev && msg.role === prev.role && !hasToolContent(msg) && !hasToolContent(prev)) { const prevContent = typeof prev.content === 'string' ? prev.content : JSON.stringify(prev.content); const currContent = typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content); prev.content = prevContent + '\n\n' + currContent; } else { merged.push({ ...msg }); } } if (merged.length !== clean.messages.length) { logger.debug({ originalCount: clean.messages.length, mergedCount: merged.length, }, 'Merged consecutive messages with same role'); } clean.messages = merged; } logger.debug({ providerType: config.modelProvider?.type ?? "databricks", messageCount: clean.messages?.length ?? 0, toolCount: clean.tools?.length ?? 0 }, 'After sanitizePayload'); // === Suggestion mode: tag request and override model if configured === const { isSuggestionMode: isSuggestion } = detectSuggestionMode(clean.messages); clean._requestMode = isSuggestion ? "suggestion" : "main"; const smConfig = config.modelProvider?.suggestionModeModel ?? "default"; if (isSuggestion && smConfig.toLowerCase() !== "default" && smConfig.toLowerCase() !== "none") { clean.model = smConfig; clean._suggestionModeModel = smConfig; } return clean; } const DEFAULT_LOOP_OPTIONS = { maxSteps: config.policy.maxStepsPerTurn ?? 6, maxDurationMs: 120000, maxToolCallsPerRequest: config.policy.maxToolCallsPerRequest ?? 20, // Prevent runaway tool calling }; function resolveLoopOptions(options = {}) { const maxSteps = Number.isInteger(options.maxSteps) && options.maxSteps > 0 ? options.maxSteps : DEFAULT_LOOP_OPTIONS.maxSteps; const maxDurationMs = Number.isInteger(options.maxDurationMs) && options.maxDurationMs > 0 ? options.maxDurationMs : DEFAULT_LOOP_OPTIONS.maxDurationMs; const maxToolCallsPerRequest = Number.isInteger(options.maxToolCallsPerRequest) && options.maxToolCallsPerRequest > 0 ? options.maxToolCallsPerRequest : DEFAULT_LOOP_OPTIONS.maxToolCallsPerRequest; return { ...DEFAULT_LOOP_OPTIONS, maxSteps, maxDurationMs, maxToolCallsPerR