UNPKG

lynkr

Version:

Self-hosted LLM gateway and tier-routing proxy for Claude Code, Cursor, and Codex. Routes across Ollama, AWS Bedrock, OpenRouter, Databricks, Azure OpenAI, llama.cpp, and LM Studio with prompt caching, MCP tools, and 60-80% cost savings.

1,603 lines (1,446 loc) 66.5 kB
/** * OpenAI API Compatibility Router * * Implements OpenAI API endpoints for Cursor IDE compatibility. * Routes: * - POST /v1/chat/completions - Chat API with streaming support * - GET /v1/models - List available models * - POST /v1/embeddings - Generate embeddings (via OpenRouter or OpenAI) * - GET /v1/health - Health check * * Note: If MODEL_PROVIDER=openrouter, the same OPENROUTER_API_KEY is used * for both chat completions and embeddings - no additional configuration needed. * * @module api/openai-router */ const express = require("express"); const logger = require("../logger"); const config = require("../config"); const orchestrator = require("../orchestrator"); const { getSession } = require("../sessions"); const { convertOpenAIToAnthropic, convertAnthropicToOpenAI } = require("../clients/openai-format"); const { IDE_SAFE_TOOLS } = require("../clients/standard-tools"); const router = express.Router(); /** * Resolve the model name for OpenAI responses. * In OpenClaw mode, returns the actual provider/model from routing metadata. */ function resolveResponseModel(resultBody, requestModel) { if (config.openclaw?.enabled && resultBody?._routingMeta) { const meta = resultBody._routingMeta; if (meta.provider && meta.model) { return `${meta.provider}/${meta.model}`; } if (meta.provider) { return meta.provider; } } return requestModel; } /** * Client detection - identifies which AI coding tool is making the request * @param {Object} headers - Request headers * @returns {string} Client type: 'codex', 'cline', 'continue', or 'unknown' */ function detectClient(headers) { const userAgent = (headers?.["user-agent"] || "").toLowerCase(); const clientHeader = (headers?.["x-client"] || headers?.["x-client-name"] || "").toLowerCase(); // Check user-agent and custom headers if (userAgent.includes("codex") || clientHeader.includes("codex") || userAgent.includes("openai-codex")) { return "codex"; } // Kilo Code is a fork of Cline - check for both if (userAgent.includes("kilo") || clientHeader.includes("kilo")) { return "kilo"; } if (userAgent.includes("cline") || clientHeader.includes("cline") || userAgent.includes("claude-dev")) { return "cline"; } if (userAgent.includes("continue") || clientHeader.includes("continue")) { return "continue"; } return "unknown"; } /** * Tool mappings for different AI coding clients * Each client has different tool names and parameter schemas */ const CLIENT_TOOL_MAPPINGS = { // ============== CODEX CLI ============== // Codex v0.121.0 only recognises "shell" and "apply_patch" as built-in // tools. All other operations (read, list, grep, etc.) must go through // shell commands — the model handles this naturally. codex: { "Bash": { name: "shell", mapArgs: (a) => ({ command: ["bash", "-c", a.command || ""] }) }, "Edit": { name: "apply_patch", mapArgs: (a) => ({ path: a.file_path || a.path || "", old_string: a.old_string || "", new_string: a.new_string || "" }) } }, // ============== CLINE (VS Code Extension) ============== // Tools: execute_command, read_file, write_to_file, replace_in_file, search_files, list_files cline: { "Bash": { name: "execute_command", mapArgs: (a) => ({ command: a.command || "", requires_approval: false }) }, "Read": { name: "read_file", mapArgs: (a) => ({ path: a.file_path || a.path || "" }) }, "Write": { name: "write_to_file", mapArgs: (a) => ({ path: a.file_path || a.path || "", content: a.content || "" }) }, "Edit": { name: "replace_in_file", mapArgs: (a) => ({ path: a.file_path || a.path || "", old_str: a.old_string || "", new_str: a.new_string || "" }) }, "Glob": { name: "list_files", mapArgs: (a) => ({ path: a.path || ".", recursive: true }) }, "Grep": { name: "search_files", mapArgs: (a) => ({ path: a.path || ".", regex: a.pattern || "", file_pattern: a.glob || "*" }) }, "ListDir": { name: "list_files", mapArgs: (a) => ({ path: a.path || a.directory || ".", recursive: false }) }, "WebAgent": { name: "web_agent", mapArgs: (a) => ({ url: a.url || "", goal: a.goal || "" }) } }, // ============== KILO CODE (Fork of Cline) ============== // Tools: execute_command, read_file, write_to_file, apply_diff, list_files, search_files, codebase_search kilo: { "Bash": { name: "execute_command", mapArgs: (a) => ({ command: a.command || "", requires_approval: false }) }, "Read": { name: "read_file", mapArgs: (a) => ({ path: a.file_path || a.path || "" }) }, "Write": { name: "write_to_file", mapArgs: (a) => ({ path: a.file_path || a.path || "", content: a.content || "" }) }, "Edit": { name: "apply_diff", mapArgs: (a) => ({ path: a.file_path || a.path || "", diff: a.old_string && a.new_string ? `--- a/${a.file_path || a.path}\n+++ b/${a.file_path || a.path}\n@@ -1 +1 @@\n-${a.old_string}\n+${a.new_string}` : "" }) }, "Glob": { name: "list_files", mapArgs: (a) => ({ path: a.path || ".", recursive: true }) }, "Grep": { name: "search_files", mapArgs: (a) => ({ path: a.path || ".", regex: a.pattern || "", file_pattern: a.glob || "*" }) }, "ListDir": { name: "list_files", mapArgs: (a) => ({ path: a.path || a.directory || ".", recursive: false }) }, "WebAgent": { name: "web_agent", mapArgs: (a) => ({ url: a.url || "", goal: a.goal || "" }) } }, // ============== CONTINUE.DEV ============== // Tools: read_file, create_new_file, exact_search, read_currently_open_file continue: { "Bash": { name: "run_terminal_command", mapArgs: (a) => ({ command: a.command || "" }) }, "Read": { name: "read_file", mapArgs: (a) => ({ filepath: a.file_path || a.path || "" }) }, "Write": { name: "create_new_file", mapArgs: (a) => ({ filepath: a.file_path || a.path || "", contents: a.content || "" }) }, "Edit": { name: "edit_existing_file", mapArgs: (a) => ({ filepath: a.file_path || a.path || "", old_string: a.old_string || "", new_string: a.new_string || "" }) }, "Glob": { name: "exact_search", mapArgs: (a) => ({ query: a.pattern || "" }) }, "Grep": { name: "exact_search", mapArgs: (a) => ({ query: a.pattern || "" }) }, "ListDir": { name: "read_file", mapArgs: (a) => ({ filepath: a.path || a.directory || "." }) }, "WebAgent": { name: "web_agent", mapArgs: (a) => ({ url: a.url || "", goal: a.goal || "" }) } } }; /** * Map Lynkr tool names and arguments to client-specific equivalents * @param {string} toolName - Lynkr tool name * @param {string} argsJson - JSON string of tool arguments * @param {string} clientType - Client type (codex, cline, continue) * @returns {{ name: string, arguments: string }} Mapped tool name and arguments */ function mapToolForClient(toolName, argsJson, clientType) { let args = {}; try { args = JSON.parse(argsJson || "{}"); } catch (e) { args = {}; } const clientMappings = CLIENT_TOOL_MAPPINGS[clientType]; if (!clientMappings) { // Unknown client - return as-is return { name: toolName, arguments: argsJson }; } const mapping = clientMappings[toolName]; if (mapping) { const mappedArgs = mapping.mapArgs(args); // Remove undefined values Object.keys(mappedArgs).forEach(key => { if (mappedArgs[key] === undefined) { delete mappedArgs[key]; } }); return { name: mapping.name, arguments: JSON.stringify(mappedArgs) }; } // No mapping found - return as-is (lowercase for convention) return { name: toolName.toLowerCase(), arguments: argsJson }; } /** * POST /v1/chat/completions * * OpenAI-compatible chat completions endpoint. * Converts OpenAI format → Anthropic → processes → converts back to OpenAI format. */ router.post("/chat/completions", async (req, res) => { const startTime = Date.now(); const sessionId = req.headers["x-session-id"] || req.headers["authorization"]?.split(" ")[1] || "openai-session"; try { // Validate request body exists if (!req.body || typeof req.body !== 'object') { logger.error({ body: req.body, bodyType: typeof req.body }, "Invalid or missing request body"); return res.status(400).json({ error: { message: "Invalid or missing request body", type: "invalid_request_error", code: "invalid_body" } }); } // Validate required fields if (!req.body.messages || !Array.isArray(req.body.messages)) { logger.error({ hasMessages: !!req.body.messages }, "Missing or invalid messages array"); return res.status(400).json({ error: { message: "Missing required field: messages (must be an array)", type: "invalid_request_error", code: "missing_messages" } }); } // DEBUG: Log full message details to diagnose Codex caching issue const messagesSummary = (req.body.messages || []).map((m, i) => ({ index: i, role: m.role, contentPreview: typeof m.content === 'string' ? m.content.substring(0, 200) : (m.content == null ? null : (JSON.stringify(m.content) ?? '').substring(0, 200)) })); logger.debug({ endpoint: "/v1/chat/completions", model: req.body.model, messageCount: req.body.messages?.length, stream: req.body.stream || false, hasTools: !!req.body.tools, toolCount: req.body.tools?.length || 0, hasMessages: !!req.body.messages, messagesType: typeof req.body.messages, requestBodyKeys: Object.keys(req.body), // Log first 500 chars of body for debugging requestBodyPreview: JSON.stringify(req.body).substring(0, 500), // DEBUG: Full messages breakdown messages: messagesSummary }, "=== OPENAI CHAT COMPLETION REQUEST ==="); // Convert OpenAI request to Anthropic format const anthropicRequest = convertOpenAIToAnthropic(req.body); // Inject tools if client didn't send any. // Two-layer filtering: // 1. IDE_SAFE_TOOLS = STANDARD_TOOLS minus AskUserQuestion (can't work through proxy) // 2. For known clients (codex, cline, etc.), further filter to only tools // that have a mapping in CLIENT_TOOL_MAPPINGS — this ensures clients like // Codex don't see tools they can't handle (Task, WebFetch, NotebookEdit) // while Claude Code (unknown client) gets the full IDE_SAFE_TOOLS set. // Skip injection if client explicitly opted out (tool_choice: "none" or empty tools array). const clientType = detectClient(req.headers); const clientExplicitlyDisabledTools = req.body.tool_choice === "none" || Array.isArray(req.body.tools); if (!clientExplicitlyDisabledTools && (!anthropicRequest.tools || anthropicRequest.tools.length === 0)) { const clientMappings = CLIENT_TOOL_MAPPINGS[clientType]; let clientTools = clientMappings ? IDE_SAFE_TOOLS.filter(t => clientMappings[t.name]) : IDE_SAFE_TOOLS; anthropicRequest.tools = clientTools; logger.debug({ clientType, injectedToolCount: clientTools.length, injectedToolNames: clientTools.map(t => t.name), reason: clientMappings ? `Known client '${clientType}' — filtered to mapped tools only` : "Unknown client — injecting full IDE_SAFE_TOOLS" }, "=== INJECTING TOOLS ==="); } // Get or create session const session = getSession(sessionId); // Handle streaming vs non-streaming if (req.body.stream) { // Set up SSE headers for streaming res.setHeader("Content-Type", "text/event-stream"); res.setHeader("Cache-Control", "no-cache"); res.setHeader("Connection", "keep-alive"); res.setHeader("X-Accel-Buffering", "no"); // Prevent nginx buffering res.flushHeaders(); // Ensure headers are sent immediately try { // For streaming, we need to handle it differently - convert to non-streaming temporarily // Get non-streaming response from orchestrator anthropicRequest.stream = false; // Force non-streaming from orchestrator const result = await orchestrator.processMessage({ payload: anthropicRequest, headers: req.headers, session: session, options: { maxSteps: req.body?.max_steps } }); // Check if we have a valid response body logger.debug({ hasResult: !!result, resultKeys: result ? Object.keys(result) : null, hasBody: result && !!result.body, bodyType: result && result.body ? typeof result.body : null, bodyKeys: result && result.body ? Object.keys(result.body) : null, status: result?.status, terminationReason: result?.terminationReason }, "=== ORCHESTRATOR RESULT STRUCTURE ==="); if (!result || !result.body) { logger.error({ result: result ? JSON.stringify(result).substring(0, 500) : "null", resultKeys: result ? Object.keys(result) : null }, "Invalid orchestrator response for streaming"); throw new Error("Invalid response from orchestrator"); } // Convert to OpenAI format const streamModel = resolveResponseModel(result.body, req.body.model); const openaiResponse = convertAnthropicToOpenAI(result.body, streamModel); // Debug: Log what we're about to stream logger.debug({ openaiResponseId: openaiResponse.id, messageContent: openaiResponse.choices[0]?.message?.content?.substring(0, 100), contentLength: openaiResponse.choices[0]?.message?.content?.length || 0, finishReason: openaiResponse.choices[0]?.finish_reason, hasToolCalls: !!openaiResponse.choices[0]?.message?.tool_calls, resultBodyKeys: Object.keys(result.body || {}), resultBodyContent: JSON.stringify(result.body?.content)?.substring(0, 200) }, "=== PREPARING TO STREAM ==="); // Simulate streaming by sending the complete response as chunks const content = openaiResponse.choices[0].message.content || ""; let toolCalls = openaiResponse.choices[0].message.tool_calls; // Map tool names for known IDE clients if (clientType !== "unknown" && toolCalls && toolCalls.length > 0) { toolCalls = toolCalls.map(tc => { const mapped = mapToolForClient(tc.function?.name || "", tc.function?.arguments || "{}", clientType); return { ...tc, function: { name: mapped.name, arguments: mapped.arguments } }; }); logger.debug({ mappedTools: toolCalls.map(t => t.function?.name), clientType }, "Tool names mapped for streaming chat/completions"); } // Send start chunk with role const startChunk = { id: openaiResponse.id, object: "chat.completion.chunk", created: openaiResponse.created, model: streamModel, system_fingerprint: "fp_lynkr", choices: [{ index: 0, delta: { role: "assistant", content: "" }, logprobs: null, finish_reason: null }] }; logger.debug({ chunk: "start", contentLength: content.length }, "Sending start chunk"); const startWriteOk = res.write(`data: ${JSON.stringify(startChunk)}\n\n`); if (!startWriteOk) { logger.warn("Start chunk write returned false (backpressure)"); } // Send content in a single chunk (or character by character for true streaming simulation) if (content) { const contentChunk = { id: openaiResponse.id, object: "chat.completion.chunk", created: openaiResponse.created, model: streamModel, system_fingerprint: "fp_lynkr", choices: [{ index: 0, delta: { content: content }, logprobs: null, finish_reason: null }] }; const contentWriteOk = res.write(`data: ${JSON.stringify(contentChunk)}\n\n`); logger.debug({ contentPreview: content.substring(0, 50), writeOk: contentWriteOk }, "Sent content chunk"); } // Send tool calls if present if (toolCalls && toolCalls.length > 0) { for (const toolCall of toolCalls) { const toolChunk = { id: openaiResponse.id, object: "chat.completion.chunk", created: openaiResponse.created, model: streamModel, choices: [{ index: 0, delta: { tool_calls: [{ index: 0, id: toolCall.id, type: "function", function: { name: toolCall.function.name, arguments: toolCall.function.arguments } }] }, finish_reason: null }] }; res.write(`data: ${JSON.stringify(toolChunk)}\n\n`); logger.debug({ toolName: toolCall.function.name }, "Sent tool call chunk"); } } // Send finish chunk const finishChunk = { id: openaiResponse.id, object: "chat.completion.chunk", created: openaiResponse.created, model: streamModel, system_fingerprint: "fp_lynkr", choices: [{ index: 0, delta: {}, logprobs: null, finish_reason: openaiResponse.choices[0].finish_reason }] }; logger.debug({ chunk: "finish", finishReason: openaiResponse.choices[0].finish_reason }, "Sending finish chunk"); res.write(`data: ${JSON.stringify(finishChunk)}\n\n`); res.write("data: [DONE]\n\n"); // Ensure data is flushed before ending logger.debug({ contentLength: content.length, contentPreview: content.substring(0, 50) }, "=== SSE STREAM COMPLETE ==="); res.end(); logger.info({ duration: Date.now() - startTime, mode: "streaming", inputTokens: openaiResponse.usage.prompt_tokens, outputTokens: openaiResponse.usage.completion_tokens }, "OpenAI streaming completed"); } catch (streamError) { logger.error({ error: streamError.message, stack: streamError.stack }, "=== STREAMING ERROR ==="); // Send error in OpenAI streaming format const errorChunk = { id: `chatcmpl-error-${Date.now()}`, object: "chat.completion.chunk", created: Math.floor(Date.now() / 1000), model: req.body.model, choices: [{ index: 0, delta: { role: "assistant", content: `Error: ${streamError.message}` }, finish_reason: "stop" }] }; res.write(`data: ${JSON.stringify(errorChunk)}\n\n`); res.write("data: [DONE]\n\n"); res.end(); } } else { // Non-streaming mode const result = await orchestrator.processMessage({ payload: anthropicRequest, headers: req.headers, session: session, options: { maxSteps: req.body?.max_steps } }); // Debug logging logger.debug({ resultKeys: Object.keys(result || {}), hasBody: !!result?.body, bodyType: typeof result?.body, bodyKeys: result?.body ? Object.keys(result.body) : null }, "Orchestrator result structure"); // Convert Anthropic response to OpenAI format const openaiResponse = convertAnthropicToOpenAI(result.body, resolveResponseModel(result.body, req.body.model)); // Map tool names for known IDE clients if (clientType !== "unknown" && openaiResponse.choices?.[0]?.message?.tool_calls?.length > 0) { openaiResponse.choices[0].message.tool_calls = openaiResponse.choices[0].message.tool_calls.map(tc => { const mapped = mapToolForClient(tc.function?.name || "", tc.function?.arguments || "{}", clientType); return { ...tc, function: { name: mapped.name, arguments: mapped.arguments } }; }); logger.debug({ mappedTools: openaiResponse.choices[0].message.tool_calls.map(t => t.function?.name), clientType }, "Tool names mapped for non-streaming chat/completions"); } logger.info({ duration: Date.now() - startTime, mode: "non-streaming", inputTokens: openaiResponse.usage.prompt_tokens, outputTokens: openaiResponse.usage.completion_tokens, finishReason: openaiResponse.choices[0].finish_reason }, "=== OPENAI CHAT COMPLETION RESPONSE ==="); res.json(openaiResponse); } } catch (error) { logger.error({ error: error.message, stack: error.stack, duration: Date.now() - startTime }, "OpenAI chat completion error"); // Return OpenAI-format error res.status(500).json({ error: { message: error.message || "Internal server error", type: "server_error", code: "internal_error" } }); } }); /** * Get all configured providers with their models (cc-relay style) * Reads from config (which comes from .env) to discover what's available */ function getConfiguredProviders() { const providers = []; const timestamp = Math.floor(Date.now() / 1000); // Check Databricks if (config.databricks?.url && config.databricks?.apiKey) { providers.push({ name: "databricks", type: "databricks", models: [ "claude-sonnet-4.5", "claude-opus-4.5", config.modelProvider?.defaultModel || "databricks-claude-sonnet-4-5" ] }); } // Check AWS Bedrock if (config.bedrock?.apiKey) { const bedrockModels = [config.bedrock.modelId]; if (config.bedrock.modelId?.includes("claude")) { bedrockModels.push( "anthropic.claude-3-5-sonnet-20241022-v2:0", "anthropic.claude-3-opus-20240229-v1:0", "anthropic.claude-3-haiku-20240307-v1:0" ); } providers.push({ name: "bedrock", type: "aws-bedrock", models: [...new Set(bedrockModels)] }); } // Check Azure Anthropic if (config.azureAnthropic?.endpoint && config.azureAnthropic?.apiKey) { providers.push({ name: "azure-anthropic", type: "azure-anthropic", models: ["claude-3-5-sonnet", "claude-opus-4.5"] }); } // Check Azure OpenAI if (config.azureOpenAI?.endpoint && config.azureOpenAI?.apiKey) { providers.push({ name: "azure-openai", type: "azure-openai", models: [ config.azureOpenAI.deployment || "gpt-4o", "gpt-4o", "gpt-4-turbo", "gpt-4", "gpt-3.5-turbo" ] }); } // Check OpenAI if (config.openai?.apiKey) { providers.push({ name: "openai", type: "openai", models: [ config.openai.model || "gpt-4o", "gpt-4o", "gpt-4o-mini", "gpt-4-turbo" ] }); } // Check OpenRouter if (config.openrouter?.apiKey) { providers.push({ name: "openrouter", type: "openrouter", models: [ config.openrouter.model || "openai/gpt-4o-mini", "anthropic/claude-3.5-sonnet", "openai/gpt-4o", "openai/gpt-4o-mini", "nvidia/nemotron-3-nano-30b-a3b:free" ] }); } // Check Ollama if (config.ollama?.endpoint) { providers.push({ name: "ollama", type: "ollama", models: [config.ollama.model || "qwen2.5-coder:7b"] }); } // Check llama.cpp if (config.llamacpp?.endpoint) { providers.push({ name: "llamacpp", type: "llama.cpp", models: [config.llamacpp.model || "default"] }); } // Check LM Studio if (config.lmstudio?.endpoint) { providers.push({ name: "lmstudio", type: "lm-studio", models: [config.lmstudio.model || "default"] }); } // Check Z.AI (Zhipu) if (config.zai?.apiKey) { providers.push({ name: "zai", type: "zhipu-ai", models: [ config.zai.model || "GLM-4.7", "GLM-4.7", "GLM-4.5-Air", "GLM-4-Plus" ] }); } // Check Moonshot AI (Kimi) if (config.moonshot?.apiKey) { providers.push({ name: "moonshot", type: "moonshot-ai", models: [ config.moonshot.model || "kimi-k2-turbo-preview", "kimi-k2-turbo-preview" ] }); } // Check Vertex AI (Google Cloud) if (config.vertex?.projectId) { providers.push({ name: "vertex", type: "google-vertex-ai", models: [ config.vertex.model || "claude-sonnet-4-5@20250514", "claude-sonnet-4-5@20250514", "claude-opus-4-5@20250514", "claude-haiku-4-5@20251001" ] }); } return providers; } /** * GET /v1/models * * List available models from ALL configured providers (cc-relay style). * Returns OpenAI-compatible model list with provider field. */ router.get("/models", (req, res) => { try { const providers = getConfiguredProviders(); const primaryProvider = config.modelProvider?.type || "databricks"; const timestamp = Math.floor(Date.now() / 1000); const models = []; const seenModelIds = new Set(); // Collect models from all providers for (const provider of providers) { for (const modelId of provider.models) { // Create unique key to avoid duplicates const uniqueKey = `${provider.name}:${modelId}`; if (seenModelIds.has(uniqueKey)) continue; seenModelIds.add(uniqueKey); models.push({ id: modelId, object: "model", created: timestamp, owned_by: provider.type, provider: provider.name, // cc-relay style: include provider name permission: [], root: modelId, parent: null }); } } // Add embedding models if embeddings are configured const embeddingConfig = determineEmbeddingProvider(); if (embeddingConfig) { let embeddingModelId; switch (embeddingConfig.provider) { case "llamacpp": embeddingModelId = "text-embedding-3-small"; break; case "ollama": embeddingModelId = embeddingConfig.model; break; case "openrouter": embeddingModelId = embeddingConfig.model; break; case "openai": embeddingModelId = embeddingConfig.model || "text-embedding-ada-002"; break; default: embeddingModelId = "text-embedding-3-small"; } const uniqueKey = `${embeddingConfig.provider}:${embeddingModelId}`; if (!seenModelIds.has(uniqueKey)) { models.push({ id: embeddingModelId, object: "model", created: timestamp, owned_by: embeddingConfig.provider, provider: embeddingConfig.provider, permission: [], root: embeddingModelId, parent: null }); } } logger.debug({ providerCount: providers.length, modelCount: models.length, models: models.map(m => ({ id: m.id, provider: m.provider })), hasEmbeddings: !!embeddingConfig }, "Listed models for OpenAI API (cc-relay style)"); res.json({ object: "list", data: models }); } catch (error) { logger.error({ error: error.message }, "Error listing models"); res.status(500).json({ error: { message: error.message || "Failed to list models", type: "server_error", code: "internal_error" } }); } }); /** * Determine which provider to use for embeddings * Priority: * 1. Explicit EMBEDDINGS_PROVIDER env var * 2. Same provider as MODEL_PROVIDER (if it supports embeddings) * 3. First available: OpenRouter > OpenAI > Ollama > llama.cpp */ function determineEmbeddingProvider(requestedModel = null) { const explicitProvider = process.env.EMBEDDINGS_PROVIDER?.trim(); // Priority 1: Explicit configuration if (explicitProvider) { switch (explicitProvider) { case "ollama": if (!config.ollama?.embeddingsModel) { logger.warn("EMBEDDINGS_PROVIDER=ollama but OLLAMA_EMBEDDINGS_MODEL not set"); return null; } return { provider: "ollama", model: requestedModel || config.ollama.embeddingsModel, endpoint: config.ollama.embeddingsEndpoint }; case "llamacpp": if (!config.llamacpp?.embeddingsEndpoint) { logger.warn("EMBEDDINGS_PROVIDER=llamacpp but LLAMACPP_EMBEDDINGS_ENDPOINT not set"); return null; } return { provider: "llamacpp", model: requestedModel || "default", endpoint: config.llamacpp.embeddingsEndpoint }; case "openrouter": if (!config.openrouter?.apiKey) { logger.warn("EMBEDDINGS_PROVIDER=openrouter but OPENROUTER_API_KEY not set"); return null; } return { provider: "openrouter", model: requestedModel || config.openrouter.embeddingsModel, apiKey: config.openrouter.apiKey, endpoint: "https://openrouter.ai/api/v1/embeddings" }; case "openai": if (!config.openai?.apiKey) { logger.warn("EMBEDDINGS_PROVIDER=openai but OPENAI_API_KEY not set"); return null; } return { provider: "openai", model: requestedModel || "text-embedding-ada-002", apiKey: config.openai.apiKey, endpoint: "https://api.openai.com/v1/embeddings" }; } } // Priority 2: Same as chat provider (if supported) const chatProvider = config.modelProvider?.type; if (chatProvider === "openrouter" && config.openrouter?.apiKey) { return { provider: "openrouter", model: requestedModel || config.openrouter.embeddingsModel, apiKey: config.openrouter.apiKey, endpoint: "https://openrouter.ai/api/v1/embeddings" }; } if (chatProvider === "ollama" && config.ollama?.embeddingsModel) { return { provider: "ollama", model: requestedModel || config.ollama.embeddingsModel, endpoint: config.ollama.embeddingsEndpoint }; } if (chatProvider === "llamacpp" && config.llamacpp?.embeddingsEndpoint) { return { provider: "llamacpp", model: requestedModel || "default", endpoint: config.llamacpp.embeddingsEndpoint }; } // Priority 3: First available provider if (config.openrouter?.apiKey) { return { provider: "openrouter", model: requestedModel || config.openrouter.embeddingsModel, apiKey: config.openrouter.apiKey, endpoint: "https://openrouter.ai/api/v1/embeddings" }; } if (config.openai?.apiKey) { return { provider: "openai", model: requestedModel || "text-embedding-ada-002", apiKey: config.openai.apiKey, endpoint: "https://api.openai.com/v1/embeddings" }; } if (config.ollama?.embeddingsModel) { return { provider: "ollama", model: requestedModel || config.ollama.embeddingsModel, endpoint: config.ollama.embeddingsEndpoint }; } if (config.llamacpp?.embeddingsEndpoint) { return { provider: "llamacpp", model: requestedModel || "default", endpoint: config.llamacpp.embeddingsEndpoint }; } return null; // No provider available } /** * Generate embeddings using Ollama * Note: Ollama only supports single prompt, not batch */ async function generateOllamaEmbeddings(inputs, embeddingConfig) { const { model, endpoint } = embeddingConfig; logger.debug({ model, endpoint, inputCount: inputs.length }, "Generating embeddings with Ollama"); // Ollama doesn't support batch, so we need to process one by one const embeddings = []; for (let i = 0; i < inputs.length; i++) { const input = inputs[i]; try { const response = await fetch(endpoint, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ model: model, prompt: input }) }); if (!response.ok) { const errorText = await response.text(); throw new Error(`Ollama embeddings error (${response.status}): ${errorText}`); } const data = await response.json(); embeddings.push({ object: "embedding", embedding: data.embedding, index: i }); } catch (error) { logger.error({ error: error.message, input: input.substring(0, 100), index: i }, "Failed to generate Ollama embedding"); throw error; } } // Convert to OpenAI format return { object: "list", data: embeddings, model: model, usage: { prompt_tokens: 0, // Ollama doesn't provide this total_tokens: 0 } }; } /** * Generate embeddings using llama.cpp * llama.cpp uses OpenAI-compatible format, so minimal conversion needed */ async function generateLlamaCppEmbeddings(inputs, embeddingConfig) { const { model, endpoint } = embeddingConfig; logger.debug({ model, endpoint, inputCount: inputs.length }, "Generating embeddings with llama.cpp"); try { const response = await fetch(endpoint, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ input: inputs, // llama.cpp supports batch encoding_format: "float" }) }); if (!response.ok) { const errorText = await response.text(); throw new Error(`llama.cpp embeddings error (${response.status}): ${errorText}`); } const data = await response.json(); // llama.cpp returns array format: [{index: 0, embedding: [[...]]}] // Need to convert to OpenAI format: {data: [{object: "embedding", embedding: [...], index: 0}]} let embeddingsData; if (Array.isArray(data)) { // llama.cpp returns array directly embeddingsData = data.map(item => ({ object: "embedding", embedding: Array.isArray(item.embedding[0]) ? item.embedding[0] : item.embedding, // Flatten double-nested array index: item.index })); } else if (data.data) { // Already in OpenAI format embeddingsData = data.data; } else { embeddingsData = []; } return { object: "list", data: embeddingsData, model: model || data.model || "default", usage: data.usage || { prompt_tokens: 0, total_tokens: 0 } }; } catch (error) { logger.error({ error: error.message, endpoint }, "Failed to generate llama.cpp embeddings"); throw error; } } /** * Generate embeddings using OpenRouter */ async function generateOpenRouterEmbeddings(inputs, embeddingConfig) { const { model, apiKey, endpoint } = embeddingConfig; logger.debug({ model, inputCount: inputs.length }, "Generating embeddings with OpenRouter"); const response = await fetch(endpoint, { method: "POST", headers: { "Content-Type": "application/json", "Authorization": `Bearer ${apiKey}`, "HTTP-Referer": "https://github.com/vishalveerareddy123/Lynkr", "X-Title": "Lynkr" }, body: JSON.stringify({ model: model, input: inputs, encoding_format: "float" }) }); if (!response.ok) { const errorText = await response.text(); throw new Error(`OpenRouter embeddings error (${response.status}): ${errorText}`); } return await response.json(); } /** * Generate embeddings using OpenAI */ async function generateOpenAIEmbeddings(inputs, embeddingConfig) { const { model, apiKey, endpoint } = embeddingConfig; logger.debug({ model, inputCount: inputs.length }, "Generating embeddings with OpenAI"); const response = await fetch(endpoint, { method: "POST", headers: { "Content-Type": "application/json", "Authorization": `Bearer ${apiKey}` }, body: JSON.stringify({ model: model, input: inputs, encoding_format: "float" }) }); if (!response.ok) { const errorText = await response.text(); throw new Error(`OpenAI embeddings error (${response.status}): ${errorText}`); } return await response.json(); } /** * POST /v1/embeddings * * Generate embeddings using configured provider (Ollama, llama.cpp, OpenRouter, or OpenAI). * Required for Cursor's semantic search features. */ router.post("/embeddings", async (req, res) => { const startTime = Date.now(); try { const { input, model, encoding_format } = req.body; // Validate input if (!input) { return res.status(400).json({ error: { message: "Missing required parameter: input", type: "invalid_request_error", code: "missing_parameter" } }); } // Convert input to array if string const inputs = Array.isArray(input) ? input : [input]; logger.debug({ endpoint: "/v1/embeddings", model: model || "auto-detect", inputCount: inputs.length, inputLengths: inputs.map(i => i.length) }, "=== OPENAI EMBEDDINGS REQUEST ==="); // Determine which provider to use for embeddings const embeddingConfig = determineEmbeddingProvider(model); if (!embeddingConfig) { logger.warn("No embedding provider configured"); return res.status(501).json({ error: { message: "Embeddings not configured. Set up one of: OPENROUTER_API_KEY, OPENAI_API_KEY, OLLAMA_EMBEDDINGS_MODEL, or LLAMACPP_EMBEDDINGS_ENDPOINT in your .env file to enable @Codebase semantic search.", type: "not_implemented", code: "embeddings_not_configured" } }); } // Route to appropriate provider let embeddingResponse; try { switch (embeddingConfig.provider) { case "ollama": embeddingResponse = await generateOllamaEmbeddings(inputs, embeddingConfig); break; case "llamacpp": embeddingResponse = await generateLlamaCppEmbeddings(inputs, embeddingConfig); break; case "openrouter": embeddingResponse = await generateOpenRouterEmbeddings(inputs, embeddingConfig); break; case "openai": embeddingResponse = await generateOpenAIEmbeddings(inputs, embeddingConfig); break; default: throw new Error(`Unsupported embedding provider: ${embeddingConfig.provider}`); } } catch (error) { logger.error({ error: error.message, provider: embeddingConfig.provider, }, "Embeddings generation failed"); return res.status(500).json({ error: { message: error.message || "Embeddings generation failed", type: "server_error", code: "embeddings_error" } }); } logger.info({ provider: embeddingConfig.provider, model: embeddingConfig.model, duration: Date.now() - startTime, embeddingCount: embeddingResponse.data?.length || 0, totalTokens: embeddingResponse.usage?.total_tokens || 0 }, "=== EMBEDDINGS RESPONSE ==="); // Return embeddings in OpenAI format res.json(embeddingResponse); } catch (error) { logger.error({ error: error.message, stack: error.stack, duration: Date.now() - startTime }, "Embeddings error"); res.status(500).json({ error: { message: error.message || "Internal server error", type: "server_error", code: "internal_error" } }); } }); /** * POST /v1/responses * * OpenAI Responses API endpoint (used by GPT-5-Codex and newer models). * Converts Responses API format to Chat Completions → processes → converts back. */ router.post("/responses", async (req, res) => { const startTime = Date.now(); const sessionId = req.headers["x-session-id"] || req.headers["authorization"]?.split(" ")[1] || "responses-session"; try { const { convertResponsesToChat, convertChatToResponses } = require("../clients/responses-format"); // Comprehensive debug logging logger.debug({ endpoint: "/v1/responses", inputType: typeof req.body.input, inputIsArray: Array.isArray(req.body.input), inputLength: Array.isArray(req.body.input) ? req.body.input.length : req.body.input?.length, inputPreview: typeof req.body.input === 'string' ? req.body.input.substring(0, 100) : Array.isArray(req.body.input) ? req.body.input.map(m => ({role: m?.role, hasContent: !!m?.content, hasTool: !!m?.tool_calls})) : 'unknown', model: req.body.model, hasTools: !!req.body.tools, stream: req.body.stream || false, fullRequestBodyKeys: Object.keys(req.body) }, "=== RESPONSES API REQUEST ==="); // Resolve previous_response_id for session continuity if (req.body.previous_response_id) { const responseStore = require("../stores/response-store"); const prev = responseStore.getResponse(req.body.previous_response_id); if (prev && Array.isArray(prev.messages)) { const prevContext = [...prev.messages]; if (prev.assistantContent) { prevContext.push({ role: "assistant", content: prev.assistantContent }); } if (Array.isArray(req.body.input)) { req.body.input = [...prevContext, ...req.body.input]; } else if (typeof req.body.input === "string") { req.body.input = [...prevContext, { role: "user", content: req.body.input }]; } logger.debug({ previousId: req.body.previous_response_id, prependedMessages: prevContext.length, }, "Resolved previous_response_id"); } else { logger.warn({ previousId: req.body.previous_response_id }, "previous_response_id not found"); } } // Convert Responses API to Chat Completions format const chatRequest = convertResponsesToChat(req.body); logger.debug({ chatRequestMessageCount: chatRequest.messages?.length, chatRequestMessages: chatRequest.messages?.map(m => ({ role: m.role, hasContent: !!m.content, contentPreview: typeof m.content === 'string' ? m.content.substring(0, 50) : m.content })) }, "After Responses→Chat conversion"); // Convert to Anthropic format const anthropicRequest = convertOpenAIToAnthropic(chatRequest); // Normalize tool_use names in conversation history to client format. // Tool definitions are injected with client names (e.g., "shell", "read_file"), // so tool_use blocks must also use client names to satisfy the Anthropic API // requirement that tool_use names match tool definitions. const clientType = detectClient(req.headers); const clientMappings = CLIENT_TOOL_MAPPINGS[clientType]; if (clientMappings && Array.isArray(anthropicRequest.messages)) { const lynkrToClient = {}; for (const [lynkrName, mapping] of Object.entries(clientMappings)) { lynkrToClient[lynkrName] = mapping.name; } for (const msg of anthropicRequest.messages) { if (!Array.isArray(msg.content)) continue; for (const block of msg.content) { if (block.type === 'tool_use' && lynkrToClient[block.name]) { block.name = lynkrToClient[block.name]; } } } } logger.debug({ anthropicMessageCount: anthropicRequest.messages?.length, anthropicMessages: anthropicRequest.messages?.map(m => ({ role: m.role, hasContent: !!m.content })) }, "After Chat→Anthropic conversion"); // Inject tools if the Anthropic request has none. // The client may have sent tools in Responses API format (top-level name) // which convertOpenAIToAnthropic silently drops because it expects Chat // Completions format ({function: {name}}). Always check the CONVERTED // result, not the raw request. const clientDisabledToolChoice = req.body.tool_choice === "none"; if (!clientDisabledToolChoice && (!anthropicRequest.tools || anthropicRequest.tools.length === 0)) { // Exclude server-side-only tools (Task, web_search, etc.) from the // Responses endpoint — they can't be executed by external clients like // Codex and would be converted to broken shell echo commands. const RESPONSES_EXCLUDED_TOOLS = new Set(["Task", "AskUserQuestion", "TodoWrite", "WebSearch", "WebFetch", "WebAgent"]); const clientMappings = CLIENT_TOOL_MAPPINGS[clientType]; let clientTools = (clientMappings ? IDE_SAFE_TOOLS.filter(t => clientMappings[t.name]) : IDE_SAFE_TOOLS ).filter(t => !RESPONSES_EXCLUDED_TOOLS.has(t.name)); // Rename tools to client-expected names so the model uses the right names // e.g., for Codex: "Read" → "read_file", "Bash" → "shell" // The lynkrToClient map above normalizes any stale Lynkr names in history if (clientMappings) { clientTools = clientTools.map(t => { const mapping = clientMappings[t.name]; if (!mapping) return t; return { ...t, name: mapping.name, description: t.description || '', }; }); } anthropicRequest.tools = clientTools; logger.info({ clientType, injectedToolCount: clientTools.length, injectedToolNames: clientTools.map(t => t.name), reason: clientMappings ? `Known client '${clientType}' — tools renamed to client conventions` : "Unknown client — injecting full IDE_SAFE_TOOLS" }, "=== INJECTING TOOLS (responses) ==="); } else { logger.info({ clientType, clientDisabledToolChoice, hasTools: !!anthropicRequest.tools, toolCount: anthropicRequest.tools?.length || 0, toolNames: anthropicRequest.tools?.map(t => t.name)?.slice(0, 10), reqToolChoice: req.body.tool_choice, reqToolsIsArray: Array.isArray(req.body.tools), reqToolsLength: req.body.tools?.length, }, "=== TOOLS NOT INJECTED (responses) ==="); } // ALWAYS strip server-side-only tools from the Responses endpoint. // These can't be executed by external clients (Codex, etc.) and cause // infinite retry loops when the model keeps calling them. const RESPONSES_EXCLUDED = new Set(["Task", "AskUserQuestion", "TodoWrite", "WebSearch", "WebFetch", "WebAgent"]); if (Array.isArray(anthropicRequest.tools)) { anthropicRequest.tools = anthropicRequest.tools.filter(t => !RESPONSES_EXCLUDED.has(t.name)); } // Snapshot tool names before the orchestrator can mutate them const injectedToolNames = new Set( (anthropicRequest.tools || []).map(t => t.name) ); // Get session const session = getSession(sessionId); // Handle streaming vs non-streaming if (req.body.stream) { // Set up SSE headers for streaming res.setHeader("Content-Type", "text/event-stream"); res.setHeader("Cache-Control", "no-cache"); res.setHeader("Connection", "keep-alive"); res.setHeader("X-Accel-Buffering", "no"); res.flushHeaders(); try { anthropicRequest.stream = false; // SSE comment keepalive (spec-compliant, ignored by all clients) const keepalive = setInterval(() => { try { res.write(`: keepalive\n\n`); } catch {} }, 2000); let result; try { result = await orchestrator.processMessage({ payload: anthropicRequest, headers: req.headers, session: session, options: { maxSteps: req.body?.max_steps } }); } finally { clearInterval(keepalive); } logger.debug({ hasResult: !!result, hasBody: !!result?.body, bodyContentLength: result?.body?.content?.length || 0, terminationReason: result?.terminationReason }, "=== ORCHESTRATOR RESULT FOR RESPONSES API ==="); // Convert back: Anthropic → OpenAI → Responses const responsesModel = resolveResponseModel(result.body, req.body.model); // Guard: if orchestrator returned an error body, surface it as text if (result.body?.error || result.status >= 400) { const errMsg = result.body?.error?.message || result.body?.error || JSON.stringify(result.body); logger