UNPKG

lynkr

Version:

Self-hosted LLM gateway and tier-routing proxy for Claude Code, Cursor, and Codex. Routes across Ollama, AWS Bedrock, OpenRouter, Databricks, Azure OpenAI, llama.cpp, and LM Studio with prompt caching, MCP tools, and 60-80% cost savings.

1,007 lines (901 loc) 35.5 kB
const express = require("express"); const config = require("../config"); const { processMessage } = require("../orchestrator"); const { getSession } = require("../sessions"); const metrics = require("../metrics"); const logger = require("../logger"); const { createRateLimiter } = require("./middleware/rate-limiter"); const openaiRouter = require("./openai-router"); const providersRouter = require("./providers-handler"); const { getRoutingHeaders, getRoutingStats, analyzeComplexity, getModelTierSelector, analyzeRisk } = require("../routing"); const { buildInteractionBlock } = require("../routing/interaction"); const { validateCwd } = require("../workspace"); const { renderText } = require("../utils/markdown-ansi"); const router = express.Router(); // Create rate limiter middleware const rateLimiter = createRateLimiter(); /** * Estimate token count for messages. * * Phase 1.1: tiktoken-backed via routing/tokenizer (graceful fallback to chars/4 * if js-tiktoken is unavailable). */ const { countMessagesTokens } = require("../routing/tokenizer"); function estimateTokenCount(messages = [], system = null, model = null) { return countMessagesTokens(messages, system, model); } // Root route - Claude Code health check router.head("/", (req, res) => { res.status(200).end(); }); router.get("/", (req, res) => { res.json({ service: "Lynkr", version: require("../../package.json").version, status: "running" }); }); router.get("/health", (req, res) => { res.json({ status: "ok" }); }); // Usage report — same data as `lynkr usage` CLI, served as JSON for // dashboards / agents / scripts that want to surface spend & savings. router.get("/v1/usage", (req, res) => { try { const aggregator = require("../usage/aggregator"); const window = req.query.window || (req.query.days ? `${parseInt(req.query.days, 10)}d` : "30d"); const usage = aggregator.getUsage({ window, flagship: req.query.flagship, provider: req.query.provider, model: req.query.model, }); res.json(usage); } catch (err) { res.status(500).json({ error: err.message }); } }); // Routing stats endpoint (Phase 3: Metrics) router.get("/routing/stats", (req, res) => { const stats = getRoutingStats(); res.json({ status: "ok", stats: stats || { message: "No routing decisions recorded yet" }, }); }); // Model registry info (from LiteLLM + models.dev APIs) router.get("/routing/models", async (req, res) => { try { const { getModelRegistry } = require("../routing/model-registry"); const registry = await getModelRegistry(); res.json({ status: "ok", ...registry.getStats(), }); } catch (err) { res.status(500).json({ error: err.message }); } }); // Get specific model info router.get("/routing/models/:model", async (req, res) => { try { const { getModelRegistry } = require("../routing/model-registry"); const registry = await getModelRegistry(); const model = registry.getModel(req.params.model); if (!model || model.source === "default") { return res.status(404).json({ error: "Model not found", model: req.params.model }); } res.json({ status: "ok", model: req.params.model, ...model }); } catch (err) { res.status(500).json({ error: err.message }); } }); // Routing tier information router.get("/routing/tiers", (req, res) => { try { const { getModelTierSelector } = require("../routing/model-tiers"); const selector = getModelTierSelector(); res.json({ status: "ok", ...selector.getTierStats(), }); } catch (err) { res.status(500).json({ error: err.message }); } }); // Cost optimization stats router.get("/metrics/cost-optimization", (req, res) => { try { const { getCostOptimizer } = require("../routing/cost-optimizer"); const optimizer = getCostOptimizer(); res.json({ status: "ok", ...optimizer.getStats(), }); } catch (err) { res.status(500).json({ error: err.message }); } }); // Request analysis test endpoint router.post("/routing/analyze", async (req, res) => { try { const { getAgenticDetector } = require("../routing/agentic-detector"); const { getModelTierSelector } = require("../routing/model-tiers"); const { getModelRegistry } = require("../routing/model-registry"); const analysis = await analyzeComplexity(req.body, { weighted: req.query.weighted === "true" }); const agentic = getAgenticDetector().detect(req.body); const selector = getModelTierSelector(); const tier = selector.getTier(analysis.score); // Get recommended model for tier const provider = req.query.provider || "openai"; const modelSelection = selector.selectModel(tier, provider); // Get model cost info let modelInfo = null; if (modelSelection.model) { const registry = await getModelRegistry(); modelInfo = registry.getCost(modelSelection.model); } res.json({ status: "ok", analysis, agentic, tier, modelSelection, modelInfo, }); } catch (err) { res.status(500).json({ error: err.message }); } }); router.get("/debug/session", (req, res) => { if (!req.sessionId) { return res.status(400).json({ error: "missing_session_id", message: "Provide x-session-id header" }); } const session = getSession(req.sessionId); if (!session) { return res.status(404).json({ error: "session_not_found", message: "Session not found" }); } res.json({ session }); }); router.post("/v1/messages/count_tokens", rateLimiter, async (req, res, next) => { try { const { messages, system } = req.body; // Validate required fields if (!messages || !Array.isArray(messages)) { return res.status(400).json({ error: { type: "invalid_request_error", message: "messages must be a non-empty array", }, }); } // Estimate token count const inputTokens = estimateTokenCount(messages, system); // Return token count in Anthropic API format res.json({ input_tokens: inputTokens, }); } catch (error) { next(error); } }); // Stub endpoint for event logging (used by Claude CLI) router.post("/api/event_logging/batch", (req, res) => { // Silently accept and discard event logging requests res.status(200).json({ success: true }); }); router.post("/v1/messages", rateLimiter, async (req, res, next) => { try { const { createTimer } = require("../utils/perf-timer"); const timer = createTimer("POST /v1/messages"); metrics.recordRequest(); // Convert Anthropic server tools (web_search_20260209, etc.) to regular // function tools so non-Anthropic providers can execute them via Lynkr. // The orchestrator's SERVER_SIDE_TOOLS handling will execute them server-side. if (Array.isArray(req.body?.tools)) { const incomingToolTypes = req.body.tools.map(t => t?.type || t?.name).filter(Boolean); logger.info({ incomingToolTypes }, "Incoming /v1/messages tool types"); req.body.tools = req.body.tools.map((tool) => { if (tool?.type?.startsWith?.("web_search_20")) { logger.info({ originalType: tool.type, name: tool.name }, "Converting web_search server tool to function tool"); return { name: tool.name || "web_search", description: "Search the web for up-to-date information. Returns relevant search results from the web.", input_schema: { type: "object", properties: { query: { type: "string", description: "Search query" }, }, required: ["query"], }, }; } if (tool?.type?.startsWith?.("web_fetch_")) { return { name: tool.name || "web_fetch", description: "Fetch the contents of a URL.", input_schema: { type: "object", properties: { url: { type: "string", description: "URL to fetch" }, }, required: ["url"], }, }; } return tool; }); } // Support both query parameter (?stream=true) and body parameter ({"stream": true}) const wantsStream = Boolean(req.query?.stream === 'true' || req.body?.stream); const hasTools = Array.isArray(req.body?.tools) && req.body.tools.length > 0; timer.mark("parseRequest"); // Analyze complexity for routing headers (Phase 3) const complexity = await analyzeComplexity(req.body); timer.mark("analyzeComplexity"); // Risk axis runs alongside complexity. Cheap pure-string scan, no I/O. let preRouteRisk = null; try { preRouteRisk = analyzeRisk(req.body); } catch (err) { logger.debug({ err: err.message }, '[Router] Risk analysis failed in pre-route'); } // Pre-route tier: high-risk forces COMPLEX, otherwise tier is // inferred from the complexity recommendation. The actual final // tier may differ (invokeModel re-runs determineProviderSmart) — // this is best-effort for header surfacing. let preRouteProvider = 'cloud'; let preRouteTier = null; let preRouteModel = null; let preRouteMethod = 'complexity'; let preRouteReason = complexity.breakdown?.taskType?.reason || complexity.recommendation; if (preRouteRisk?.level === 'high') { try { const selector = getModelTierSelector(); const tierResult = selector.selectModel('COMPLEX', null); preRouteProvider = tierResult.provider; preRouteTier = 'COMPLEX'; preRouteModel = tierResult.model; preRouteMethod = 'risk'; preRouteReason = 'high_risk_forced_tier'; } catch (_) { // Risk-forced tier not configured; fall back to normal flow. } } if (!preRouteTier) { if (complexity.recommendation === 'local') { try { const selector = getModelTierSelector(); const tierResult = selector.selectModel('SIMPLE', null); preRouteProvider = tierResult.provider; preRouteTier = 'SIMPLE'; preRouteModel = tierResult.model; } catch (_) { preRouteProvider = 'ollama'; } } } const preRouteDecision = { provider: preRouteProvider, tier: preRouteTier, model: preRouteModel, method: preRouteMethod, reason: preRouteReason, score: complexity.score, threshold: complexity.threshold, risk: preRouteRisk, }; const routingHeaders = getRoutingHeaders(preRouteDecision); // Build the interaction block once. It travels in headers always // (X-Lynkr-Interaction-* derived fields) and optionally into the // response body when LYNKR_VISIBLE_ROUTING=true. const interaction = buildInteractionBlock(preRouteDecision); // Extract client CWD from request body or header const clientCwd = validateCwd(req.body?.cwd || req.headers['x-workspace-cwd']); // For true streaming: only support non-tool requests for MVP // Tool requests require buffering for agent loop if (wantsStream && !hasTools) { // True streaming path for text-only requests metrics.recordStreamingStart(); res.set({ "Content-Type": "text/event-stream", "Cache-Control": "no-cache", Connection: "keep-alive", ...routingHeaders, // Include routing headers }); if (typeof res.flushHeaders === "function") { res.flushHeaders(); } const result = await processMessage({ payload: req.body, headers: req.headers, session: req.session, cwd: clientCwd, options: { maxSteps: req.body?.max_steps, maxDurationMs: req.body?.max_duration_ms, tenantPolicy: res.locals?.tenantPolicy || null, }, }); // Check if we got a stream back if (result.stream) { // Parse SSE stream from provider and forward to client const reader = result.stream.getReader(); const decoder = new TextDecoder(); const bufferChunks = []; // Use array to avoid string concatenation overhead try { while (true) { const { done, value } = await reader.read(); if (done) break; const chunk = decoder.decode(value, { stream: true }); bufferChunks.push(chunk); // Join buffer and split by lines const buffer = bufferChunks.join(''); const lines = buffer.split('\n'); // Keep last incomplete line in buffer chunks const remaining = lines.pop() || ''; bufferChunks.length = 0; if (remaining) bufferChunks.push(remaining); for (const line of lines) { if (line.trim()) { res.write(line + '\n'); } } // Flush after each chunk if (typeof res.flush === 'function') { res.flush(); } } // Send any remaining buffer const remaining = bufferChunks.join(''); if (remaining.trim()) { res.write(remaining + '\n'); } metrics.recordResponse(200); res.end(); return; } catch (streamError) { logger.error({ error: streamError }, "Error streaming response"); // Cancel stream on error try { await reader.cancel(); } catch (cancelError) { logger.debug({ error: cancelError }, "Failed to cancel stream"); } if (!res.headersSent) { res.status(500).json({ error: "Streaming error" }); } else { res.end(); } return; } finally { // CRITICAL: Always release lock try { reader.releaseLock(); } catch (releaseError) { // Lock may already be released, ignore logger.debug({ error: releaseError }, "Stream lock already released"); } } } // Fallback: if no stream, wrap buffered response in proper Anthropic SSE format // Check if result.body exists if (!result || !result.body) { res.write(`event: error\n`); res.write(`data: ${JSON.stringify({ type: "error", error: { message: "Empty response from provider" } })}\n\n`); res.end(); return; } const msg = result.body; // 1. message_start res.write(`event: message_start\n`); res.write(`data: ${JSON.stringify({ type: "message_start", message: { id: msg.id, type: "message", role: "assistant", content: [], model: msg.model, stop_reason: null, stop_sequence: null, usage: { input_tokens: msg.usage?.input_tokens || 0, output_tokens: 1 } } })}\n\n`); // 2. content_block_start and content_block_delta for each content block // Filter out server-side tools that shouldn't reach the client const _serverTools = new Set(["task", "websearch", "webfetch", "web_search", "web_fetch", "web_agent"]); const contentBlocks = (msg.content || []).filter(b => !(b.type === "tool_use" && _serverTools.has((b.name || "").toLowerCase())) ); for (let i = 0; i < contentBlocks.length; i++) { const block = contentBlocks[i]; if (block.type === "text") { res.write(`event: content_block_start\n`); res.write(`data: ${JSON.stringify({ type: "content_block_start", index: i, content_block: { type: "text", text: "" } })}\n\n`); // Send text — one chunk when ANSI rendering is active (splitting // ANSI escape sequences across 20-char chunks breaks terminal output). // Plain text falls back to line-level chunks for a trickle effect. // Never apply ANSI rendering to HTML content (<artifact> blocks): // ANSI codes corrupt CSS selectors like `*` and break the browser viewer. const rawBlockText = block.text || ""; const isHtmlContent = rawBlockText.includes("<artifact") || rawBlockText.trimStart().startsWith("<"); const text = isHtmlContent ? rawBlockText : renderText(rawBlockText); const { enabled: ansiEnabled } = require("../utils/markdown-ansi"); if (ansiEnabled && !isHtmlContent) { if (text.length > 0) { res.write(`event: content_block_delta\n`); res.write(`data: ${JSON.stringify({ type: "content_block_delta", index: i, delta: { type: "text_delta", text } })}\n\n`); } } else { const lines = text.split("\n"); for (const line of lines) { const lineWithNl = line + "\n"; res.write(`event: content_block_delta\n`); res.write(`data: ${JSON.stringify({ type: "content_block_delta", index: i, delta: { type: "text_delta", text: lineWithNl } })}\n\n`); } } res.write(`event: content_block_stop\n`); res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`); } else if (block.type === "thinking") { res.write(`event: content_block_start\n`); res.write(`data: ${JSON.stringify({ type: "content_block_start", index: i, content_block: { type: "thinking", thinking: "" } })}\n\n`); const thinkingText = block.thinking || ""; const thinkChunkSize = 40; for (let j = 0; j < thinkingText.length; j += thinkChunkSize) { res.write(`event: content_block_delta\n`); res.write(`data: ${JSON.stringify({ type: "content_block_delta", index: i, delta: { type: "thinking_delta", thinking: thinkingText.slice(j, j + thinkChunkSize) } })}\n\n`); } res.write(`event: content_block_stop\n`); res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`); } else if (block.type === "tool_use") { // Original request had no tools → model hallucinated a tool call. // Extract file content from write-style tools and wrap it in an // <artifact> block so open-design routes it to the Design panel. const toolName = (block.name || "").toLowerCase(); const writeTools = new Set(["write", "create_file", "write_file", "str_replace_editor"]); if (writeTools.has(toolName)) { const rawContent = block.input?.content ?? block.input?.file_content ?? block.input?.new_content ?? ""; const filePath = String(block.input?.file_path ?? block.input?.filename ?? "design.html"); const content = String(rawContent); if (content) { // Wrap in <artifact> so open-design's parser routes it to the file viewer. const identifier = filePath.replace(/[^a-zA-Z0-9._-]/g, "_"); const title = filePath; const wrapped = `<artifact identifier="${identifier}" type="text/html" title="${title}">\n${content}\n</artifact>`; res.write(`event: content_block_start\n`); res.write(`data: ${JSON.stringify({ type: "content_block_start", index: i, content_block: { type: "text", text: "" } })}\n\n`); res.write(`event: content_block_delta\n`); res.write(`data: ${JSON.stringify({ type: "content_block_delta", index: i, delta: { type: "text_delta", text: wrapped } })}\n\n`); res.write(`event: content_block_stop\n`); res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`); } } // Non-write tool_use in a tool-less request is silently dropped. } } // 3. message_delta with stop_reason res.write(`event: message_delta\n`); res.write(`data: ${JSON.stringify({ type: "message_delta", delta: { stop_reason: msg.stop_reason || "end_turn", stop_sequence: null }, usage: { output_tokens: msg.usage?.output_tokens || 0 } })}\n\n`); // 4. message_stop res.write(`event: message_stop\n`); res.write(`data: ${JSON.stringify({ type: "message_stop" })}\n\n`); metrics.recordResponse(result.status); res.end(); return; } // Non-streaming or tool-based requests (buffered path) timer.mark("preProcessMessage"); const result = await processMessage({ payload: req.body, headers: req.headers, session: req.session, cwd: clientCwd, options: { maxSteps: req.body?.max_steps, maxDurationMs: req.body?.max_duration_ms, tenantPolicy: res.locals?.tenantPolicy || null, }, }); timer.mark("processMessage"); timer.done(); // Legacy streaming wrapper (for tool-based requests that requested streaming) if (wantsStream && hasTools) { metrics.recordStreamingStart(); res.set({ "Content-Type": "text/event-stream", "Cache-Control": "no-cache", Connection: "keep-alive", }); if (typeof res.flushHeaders === "function") { res.flushHeaders(); } // Check if result.body exists if (!result || !result.body) { res.write(`event: error\n`); res.write(`data: ${JSON.stringify({ type: "error", error: { message: "Empty response from provider" } })}\n\n`); res.end(); return; } // Use proper Anthropic SSE format const msg = result.body; // 1. message_start res.write(`event: message_start\n`); res.write(`data: ${JSON.stringify({ type: "message_start", message: { id: msg.id, type: "message", role: "assistant", content: [], model: msg.model, stop_reason: null, stop_sequence: null, usage: { input_tokens: msg.usage?.input_tokens || 0, output_tokens: 1 } } })}\n\n`); // 2. content_block_start and content_block_delta for each content block // Filter out server-side tools that shouldn't reach the client const _serverTools = new Set(["task", "websearch", "webfetch", "web_search", "web_fetch", "web_agent"]); const contentBlocks = (msg.content || []).filter(b => !(b.type === "tool_use" && _serverTools.has((b.name || "").toLowerCase())) ); for (let i = 0; i < contentBlocks.length; i++) { const block = contentBlocks[i]; if (block.type === "text") { res.write(`event: content_block_start\n`); res.write(`data: ${JSON.stringify({ type: "content_block_start", index: i, content_block: { type: "text", text: "" } })}\n\n`); const rawBlockText2 = block.text || ""; const isHtmlContent2 = rawBlockText2.includes("<artifact") || rawBlockText2.trimStart().startsWith("<"); const text = isHtmlContent2 ? rawBlockText2 : renderText(rawBlockText2); const { enabled: ansiEnabled } = require("../utils/markdown-ansi"); if (ansiEnabled && !isHtmlContent2) { if (text.length > 0) { res.write(`event: content_block_delta\n`); res.write(`data: ${JSON.stringify({ type: "content_block_delta", index: i, delta: { type: "text_delta", text } })}\n\n`); } } else { const lines = text.split("\n"); for (const line of lines) { const lineWithNl = line + "\n"; res.write(`event: content_block_delta\n`); res.write(`data: ${JSON.stringify({ type: "content_block_delta", index: i, delta: { type: "text_delta", text: lineWithNl } })}\n\n`); } } res.write(`event: content_block_stop\n`); res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`); } else if (block.type === "thinking") { res.write(`event: content_block_start\n`); res.write(`data: ${JSON.stringify({ type: "content_block_start", index: i, content_block: { type: "thinking", thinking: "" } })}\n\n`); const thinkingText = block.thinking || ""; const thinkChunkSize = 40; for (let j = 0; j < thinkingText.length; j += thinkChunkSize) { res.write(`event: content_block_delta\n`); res.write(`data: ${JSON.stringify({ type: "content_block_delta", index: i, delta: { type: "thinking_delta", thinking: thinkingText.slice(j, j + thinkChunkSize) } })}\n\n`); } res.write(`event: content_block_stop\n`); res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`); } else if (block.type === "tool_use") { res.write(`event: content_block_start\n`); res.write(`data: ${JSON.stringify({ type: "content_block_start", index: i, content_block: { type: "tool_use", id: block.id, name: block.name, input: {} } })}\n\n`); res.write(`event: content_block_delta\n`); res.write(`data: ${JSON.stringify({ type: "content_block_delta", index: i, delta: { type: "input_json_delta", partial_json: JSON.stringify(block.input) } })}\n\n`); res.write(`event: content_block_stop\n`); res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`); } } // 3. message_delta with stop_reason res.write(`event: message_delta\n`); res.write(`data: ${JSON.stringify({ type: "message_delta", delta: { stop_reason: msg.stop_reason || "end_turn", stop_sequence: null }, usage: { output_tokens: msg.usage?.output_tokens || 0 } })}\n\n`); // 4. message_stop res.write(`event: message_stop\n`); res.write(`data: ${JSON.stringify({ type: "message_stop" })}\n\n`); metrics.recordResponse(result.status); res.end(); return; } // Add routing headers (Phase 3) Object.entries(routingHeaders).forEach(([key, value]) => { if (value !== undefined) { res.setHeader(key, value); } }); if (result.headers) { Object.entries(result.headers).forEach(([key, value]) => { if (value !== undefined) { res.setHeader(key, value); } }); } // Inject visible interaction block into the response body when // LYNKR_VISIBLE_ROUTING=true. We only mutate JSON bodies — and only // when the response looks like a valid Anthropic Message — so this // is a no-op for streamed / error / non-message responses. let finalBody = result.body; if ( config.routing?.visibleInteraction && interaction && result.status >= 200 && result.status < 300 && result.body ) { try { const text = Buffer.isBuffer(result.body) ? result.body.toString('utf8') : result.body; if (typeof text === 'string' && text.startsWith('{')) { const parsed = JSON.parse(text); if (parsed && typeof parsed === 'object' && parsed.type === 'message') { parsed.lynkr_interaction = interaction; // Inject a visible text block into content so Claude Code renders it. if (Array.isArray(parsed.content)) { const lines = [ `╭─ Lynkr ${'─'.repeat(40)}`, `│ Tier ${interaction.tier ?? '—'} → ${interaction.model ?? '—'} (${interaction.provider ?? '—'})`, `│ Score ${interaction.complexity_score ?? '—'}/100 · Risk: ${interaction.risk ?? '—'} · Savings: ~${interaction.estimated_savings_percent ?? 0}%`, `│ Route ${interaction.mode ?? '—'} — ${interaction.headline ?? ''}`, `╰${'─'.repeat(46)}`, ]; parsed.content.unshift({ type: 'text', text: lines.join('\n') }); } finalBody = JSON.stringify(parsed); } } } catch (err) { logger.debug({ err: err.message }, '[Router] Skipped interaction injection (non-JSON body)'); } } metrics.recordResponse(result.status); res.status(result.status).send(finalBody); } catch (error) { next(error); } }); // List available agents (must come before parameterized routes) router.get("/v1/agents", (req, res) => { try { const { listAgents } = require("../agents"); const agents = listAgents(); res.json({ agents }); } catch (error) { res.status(500).json({ error: error.message }); } }); // Agent stats endpoint (specific path before parameterized) router.get("/v1/agents/stats", (req, res) => { try { const { getAgentStats } = require("../agents"); const stats = getAgentStats(); res.json({ stats }); } catch (error) { res.status(500).json({ error: error.message }); } }); // Read agent transcript (specific path with param before catch-all) router.get("/v1/agents/:agentId/transcript", (req, res) => { try { const ContextManager = require("../agents/context-manager"); const cm = new ContextManager(); const transcript = cm.readTranscript(req.params.agentId); if (!transcript) { return res.status(404).json({ error: "Transcript not found" }); } res.json({ transcript }); } catch (error) { res.status(500).json({ error: error.message }); } }); // Agent execution details (parameterized - must come last) router.get("/v1/agents/:executionId", (req, res) => { try { const { getAgentExecution } = require("../agents"); const details = getAgentExecution(req.params.executionId); if (!details) { return res.status(404).json({ error: "Execution not found" }); } res.json(details); } catch (error) { res.status(500).json({ error: error.message }); } }); // Token usage statistics for a session router.get("/api/sessions/:sessionId/tokens", (req, res) => { try { const tokens = require("../utils/tokens"); const { sessionId } = req.params; const session = getSession(sessionId); if (!session) { return res.status(404).json({ error: "Session not found" }); } const stats = tokens.getSessionTokenStats(session); res.json({ sessionId, stats: { turns: stats.turns, totalTokens: stats.totalTokens, totalCost: parseFloat(stats.totalCost.toFixed(4)), averageTokensPerTurn: stats.averageTokensPerTurn, cacheHitRate: parseFloat(stats.cacheHitRate) + '%' }, breakdown: stats.breakdown.map(turn => ({ turn: turn.turn, timestamp: turn.timestamp, model: turn.model, estimated: turn.estimated.total, actual: { input: turn.actual.inputTokens, output: turn.actual.outputTokens, cached: turn.actual.cacheReadTokens, total: turn.actual.totalTokens }, cost: parseFloat(turn.cost.total.toFixed(6)) })) }); } catch (error) { res.status(500).json({ error: error.message }); } }); // Global token usage statistics (all sessions) router.get("/api/tokens/stats", (req, res) => { try { const tokens = require("../utils/tokens"); const { getAllSessions } = require("../sessions"); const allSessions = getAllSessions(); let totalTokens = 0; let totalCost = 0; let totalTurns = 0; let totalSessions = 0; for (const session of allSessions) { const stats = tokens.getSessionTokenStats(session); if (stats.turns > 0) { totalTokens += stats.totalTokens; totalCost += stats.totalCost; totalTurns += stats.turns; totalSessions++; } } res.json({ global: { sessions: totalSessions, turns: totalTurns, totalTokens, totalCost: parseFloat(totalCost.toFixed(4)), averageTokensPerTurn: totalTurns > 0 ? Math.round(totalTokens / totalTurns) : 0, averageTokensPerSession: totalSessions > 0 ? Math.round(totalTokens / totalSessions) : 0 } }); } catch (error) { res.status(500).json({ error: error.message }); } }); // Mount OpenAI-compatible endpoints for Cursor IDE support router.use("/v1", openaiRouter); // Mount Anthropic-compatible provider discovery endpoints (cc-relay style) // These provide /v1/models and /v1/providers for Claude Code CLI compatibility router.use("/v1", providersRouter); // Headroom compression endpoints router.get("/metrics/compression", async (req, res) => { try { const { getCombinedMetrics } = require("../headroom"); const metrics = await getCombinedMetrics(); res.json(metrics); } catch (error) { res.status(500).json({ error: error.message }); } }); router.get("/metrics/tool-compression", (req, res) => { const { getMetrics } = require("../context/tool-result-compressor"); res.json(getMetrics()); }); router.get("/tee/:id", (req, res) => { const { teeGet } = require("../context/tool-result-compressor"); const content = teeGet(req.params.id); if (!content) return res.status(404).json({ error: "Tee entry not found or expired" }); res.type("text/plain").send(content); }); router.get("/health/headroom", async (req, res) => { try { const { getHeadroomManager } = require("../headroom"); const manager = getHeadroomManager(); const health = await manager.getHealth(); res.status(health.healthy ? 200 : 503).json(health); } catch (error) { res.status(500).json({ error: error.message }); } }); router.get("/headroom/status", async (req, res) => { try { const { getHeadroomManager } = require("../headroom"); const manager = getHeadroomManager(); const status = await manager.getDetailedStatus(); res.json(status); } catch (error) { res.status(500).json({ error: error.message }); } }); router.post("/headroom/restart", async (req, res) => { try { const { getHeadroomManager } = require("../headroom"); const manager = getHeadroomManager(); const result = await manager.restart(); res.json({ success: true, ...result }); } catch (error) { res.status(500).json({ error: error.message }); } }); router.get("/headroom/logs", async (req, res) => { try { const { getHeadroomManager } = require("../headroom"); const manager = getHeadroomManager(); const tail = parseInt(req.query.tail || "100", 10); const logs = await manager.getLogs(tail); if (logs === null) { return res.status(400).json({ error: "Docker management is disabled" }); } res.type("text/plain").send(logs); } catch (error) { res.status(500).json({ error: error.message }); } }); module.exports = router;