lynkr
Version:
Self-hosted LLM gateway and tier-routing proxy for Claude Code, Cursor, and Codex. Routes across Ollama, AWS Bedrock, OpenRouter, Databricks, Azure OpenAI, llama.cpp, and LM Studio with prompt caching, MCP tools, and 60-80% cost savings.
1,007 lines (901 loc) • 35.5 kB
JavaScript
const express = require("express");
const config = require("../config");
const { processMessage } = require("../orchestrator");
const { getSession } = require("../sessions");
const metrics = require("../metrics");
const logger = require("../logger");
const { createRateLimiter } = require("./middleware/rate-limiter");
const openaiRouter = require("./openai-router");
const providersRouter = require("./providers-handler");
const { getRoutingHeaders, getRoutingStats, analyzeComplexity, getModelTierSelector, analyzeRisk } = require("../routing");
const { buildInteractionBlock } = require("../routing/interaction");
const { validateCwd } = require("../workspace");
const { renderText } = require("../utils/markdown-ansi");
const router = express.Router();
// Create rate limiter middleware
const rateLimiter = createRateLimiter();
/**
* Estimate token count for messages.
*
* Phase 1.1: tiktoken-backed via routing/tokenizer (graceful fallback to chars/4
* if js-tiktoken is unavailable).
*/
const { countMessagesTokens } = require("../routing/tokenizer");
function estimateTokenCount(messages = [], system = null, model = null) {
return countMessagesTokens(messages, system, model);
}
// Root route - Claude Code health check
router.head("/", (req, res) => {
res.status(200).end();
});
router.get("/", (req, res) => {
res.json({
service: "Lynkr",
version: require("../../package.json").version,
status: "running"
});
});
router.get("/health", (req, res) => {
res.json({ status: "ok" });
});
// Usage report — same data as `lynkr usage` CLI, served as JSON for
// dashboards / agents / scripts that want to surface spend & savings.
router.get("/v1/usage", (req, res) => {
try {
const aggregator = require("../usage/aggregator");
const window = req.query.window || (req.query.days ? `${parseInt(req.query.days, 10)}d` : "30d");
const usage = aggregator.getUsage({
window,
flagship: req.query.flagship,
provider: req.query.provider,
model: req.query.model,
});
res.json(usage);
} catch (err) {
res.status(500).json({ error: err.message });
}
});
// Routing stats endpoint (Phase 3: Metrics)
router.get("/routing/stats", (req, res) => {
const stats = getRoutingStats();
res.json({
status: "ok",
stats: stats || { message: "No routing decisions recorded yet" },
});
});
// Model registry info (from LiteLLM + models.dev APIs)
router.get("/routing/models", async (req, res) => {
try {
const { getModelRegistry } = require("../routing/model-registry");
const registry = await getModelRegistry();
res.json({
status: "ok",
...registry.getStats(),
});
} catch (err) {
res.status(500).json({ error: err.message });
}
});
// Get specific model info
router.get("/routing/models/:model", async (req, res) => {
try {
const { getModelRegistry } = require("../routing/model-registry");
const registry = await getModelRegistry();
const model = registry.getModel(req.params.model);
if (!model || model.source === "default") {
return res.status(404).json({ error: "Model not found", model: req.params.model });
}
res.json({ status: "ok", model: req.params.model, ...model });
} catch (err) {
res.status(500).json({ error: err.message });
}
});
// Routing tier information
router.get("/routing/tiers", (req, res) => {
try {
const { getModelTierSelector } = require("../routing/model-tiers");
const selector = getModelTierSelector();
res.json({
status: "ok",
...selector.getTierStats(),
});
} catch (err) {
res.status(500).json({ error: err.message });
}
});
// Cost optimization stats
router.get("/metrics/cost-optimization", (req, res) => {
try {
const { getCostOptimizer } = require("../routing/cost-optimizer");
const optimizer = getCostOptimizer();
res.json({
status: "ok",
...optimizer.getStats(),
});
} catch (err) {
res.status(500).json({ error: err.message });
}
});
// Request analysis test endpoint
router.post("/routing/analyze", async (req, res) => {
try {
const { getAgenticDetector } = require("../routing/agentic-detector");
const { getModelTierSelector } = require("../routing/model-tiers");
const { getModelRegistry } = require("../routing/model-registry");
const analysis = await analyzeComplexity(req.body, { weighted: req.query.weighted === "true" });
const agentic = getAgenticDetector().detect(req.body);
const selector = getModelTierSelector();
const tier = selector.getTier(analysis.score);
// Get recommended model for tier
const provider = req.query.provider || "openai";
const modelSelection = selector.selectModel(tier, provider);
// Get model cost info
let modelInfo = null;
if (modelSelection.model) {
const registry = await getModelRegistry();
modelInfo = registry.getCost(modelSelection.model);
}
res.json({
status: "ok",
analysis,
agentic,
tier,
modelSelection,
modelInfo,
});
} catch (err) {
res.status(500).json({ error: err.message });
}
});
router.get("/debug/session", (req, res) => {
if (!req.sessionId) {
return res.status(400).json({ error: "missing_session_id", message: "Provide x-session-id header" });
}
const session = getSession(req.sessionId);
if (!session) {
return res.status(404).json({ error: "session_not_found", message: "Session not found" });
}
res.json({ session });
});
router.post("/v1/messages/count_tokens", rateLimiter, async (req, res, next) => {
try {
const { messages, system } = req.body;
// Validate required fields
if (!messages || !Array.isArray(messages)) {
return res.status(400).json({
error: {
type: "invalid_request_error",
message: "messages must be a non-empty array",
},
});
}
// Estimate token count
const inputTokens = estimateTokenCount(messages, system);
// Return token count in Anthropic API format
res.json({
input_tokens: inputTokens,
});
} catch (error) {
next(error);
}
});
// Stub endpoint for event logging (used by Claude CLI)
router.post("/api/event_logging/batch", (req, res) => {
// Silently accept and discard event logging requests
res.status(200).json({ success: true });
});
router.post("/v1/messages", rateLimiter, async (req, res, next) => {
try {
const { createTimer } = require("../utils/perf-timer");
const timer = createTimer("POST /v1/messages");
metrics.recordRequest();
// Convert Anthropic server tools (web_search_20260209, etc.) to regular
// function tools so non-Anthropic providers can execute them via Lynkr.
// The orchestrator's SERVER_SIDE_TOOLS handling will execute them server-side.
if (Array.isArray(req.body?.tools)) {
const incomingToolTypes = req.body.tools.map(t => t?.type || t?.name).filter(Boolean);
logger.info({ incomingToolTypes }, "Incoming /v1/messages tool types");
req.body.tools = req.body.tools.map((tool) => {
if (tool?.type?.startsWith?.("web_search_20")) {
logger.info({ originalType: tool.type, name: tool.name }, "Converting web_search server tool to function tool");
return {
name: tool.name || "web_search",
description: "Search the web for up-to-date information. Returns relevant search results from the web.",
input_schema: {
type: "object",
properties: {
query: { type: "string", description: "Search query" },
},
required: ["query"],
},
};
}
if (tool?.type?.startsWith?.("web_fetch_")) {
return {
name: tool.name || "web_fetch",
description: "Fetch the contents of a URL.",
input_schema: {
type: "object",
properties: {
url: { type: "string", description: "URL to fetch" },
},
required: ["url"],
},
};
}
return tool;
});
}
// Support both query parameter (?stream=true) and body parameter ({"stream": true})
const wantsStream = Boolean(req.query?.stream === 'true' || req.body?.stream);
const hasTools = Array.isArray(req.body?.tools) && req.body.tools.length > 0;
timer.mark("parseRequest");
// Analyze complexity for routing headers (Phase 3)
const complexity = await analyzeComplexity(req.body);
timer.mark("analyzeComplexity");
// Risk axis runs alongside complexity. Cheap pure-string scan, no I/O.
let preRouteRisk = null;
try {
preRouteRisk = analyzeRisk(req.body);
} catch (err) {
logger.debug({ err: err.message }, '[Router] Risk analysis failed in pre-route');
}
// Pre-route tier: high-risk forces COMPLEX, otherwise tier is
// inferred from the complexity recommendation. The actual final
// tier may differ (invokeModel re-runs determineProviderSmart) —
// this is best-effort for header surfacing.
let preRouteProvider = 'cloud';
let preRouteTier = null;
let preRouteModel = null;
let preRouteMethod = 'complexity';
let preRouteReason = complexity.breakdown?.taskType?.reason || complexity.recommendation;
if (preRouteRisk?.level === 'high') {
try {
const selector = getModelTierSelector();
const tierResult = selector.selectModel('COMPLEX', null);
preRouteProvider = tierResult.provider;
preRouteTier = 'COMPLEX';
preRouteModel = tierResult.model;
preRouteMethod = 'risk';
preRouteReason = 'high_risk_forced_tier';
} catch (_) {
// Risk-forced tier not configured; fall back to normal flow.
}
}
if (!preRouteTier) {
if (complexity.recommendation === 'local') {
try {
const selector = getModelTierSelector();
const tierResult = selector.selectModel('SIMPLE', null);
preRouteProvider = tierResult.provider;
preRouteTier = 'SIMPLE';
preRouteModel = tierResult.model;
} catch (_) {
preRouteProvider = 'ollama';
}
}
}
const preRouteDecision = {
provider: preRouteProvider,
tier: preRouteTier,
model: preRouteModel,
method: preRouteMethod,
reason: preRouteReason,
score: complexity.score,
threshold: complexity.threshold,
risk: preRouteRisk,
};
const routingHeaders = getRoutingHeaders(preRouteDecision);
// Build the interaction block once. It travels in headers always
// (X-Lynkr-Interaction-* derived fields) and optionally into the
// response body when LYNKR_VISIBLE_ROUTING=true.
const interaction = buildInteractionBlock(preRouteDecision);
// Extract client CWD from request body or header
const clientCwd = validateCwd(req.body?.cwd || req.headers['x-workspace-cwd']);
// For true streaming: only support non-tool requests for MVP
// Tool requests require buffering for agent loop
if (wantsStream && !hasTools) {
// True streaming path for text-only requests
metrics.recordStreamingStart();
res.set({
"Content-Type": "text/event-stream",
"Cache-Control": "no-cache",
Connection: "keep-alive",
...routingHeaders, // Include routing headers
});
if (typeof res.flushHeaders === "function") {
res.flushHeaders();
}
const result = await processMessage({
payload: req.body,
headers: req.headers,
session: req.session,
cwd: clientCwd,
options: {
maxSteps: req.body?.max_steps,
maxDurationMs: req.body?.max_duration_ms,
tenantPolicy: res.locals?.tenantPolicy || null,
},
});
// Check if we got a stream back
if (result.stream) {
// Parse SSE stream from provider and forward to client
const reader = result.stream.getReader();
const decoder = new TextDecoder();
const bufferChunks = []; // Use array to avoid string concatenation overhead
try {
while (true) {
const { done, value } = await reader.read();
if (done) break;
const chunk = decoder.decode(value, { stream: true });
bufferChunks.push(chunk);
// Join buffer and split by lines
const buffer = bufferChunks.join('');
const lines = buffer.split('\n');
// Keep last incomplete line in buffer chunks
const remaining = lines.pop() || '';
bufferChunks.length = 0;
if (remaining) bufferChunks.push(remaining);
for (const line of lines) {
if (line.trim()) {
res.write(line + '\n');
}
}
// Flush after each chunk
if (typeof res.flush === 'function') {
res.flush();
}
}
// Send any remaining buffer
const remaining = bufferChunks.join('');
if (remaining.trim()) {
res.write(remaining + '\n');
}
metrics.recordResponse(200);
res.end();
return;
} catch (streamError) {
logger.error({ error: streamError }, "Error streaming response");
// Cancel stream on error
try {
await reader.cancel();
} catch (cancelError) {
logger.debug({ error: cancelError }, "Failed to cancel stream");
}
if (!res.headersSent) {
res.status(500).json({ error: "Streaming error" });
} else {
res.end();
}
return;
} finally {
// CRITICAL: Always release lock
try {
reader.releaseLock();
} catch (releaseError) {
// Lock may already be released, ignore
logger.debug({ error: releaseError }, "Stream lock already released");
}
}
}
// Fallback: if no stream, wrap buffered response in proper Anthropic SSE format
// Check if result.body exists
if (!result || !result.body) {
res.write(`event: error\n`);
res.write(`data: ${JSON.stringify({ type: "error", error: { message: "Empty response from provider" } })}\n\n`);
res.end();
return;
}
const msg = result.body;
// 1. message_start
res.write(`event: message_start\n`);
res.write(`data: ${JSON.stringify({
type: "message_start",
message: {
id: msg.id,
type: "message",
role: "assistant",
content: [],
model: msg.model,
stop_reason: null,
stop_sequence: null,
usage: { input_tokens: msg.usage?.input_tokens || 0, output_tokens: 1 }
}
})}\n\n`);
// 2. content_block_start and content_block_delta for each content block
// Filter out server-side tools that shouldn't reach the client
const _serverTools = new Set(["task", "websearch", "webfetch", "web_search", "web_fetch", "web_agent"]);
const contentBlocks = (msg.content || []).filter(b =>
!(b.type === "tool_use" && _serverTools.has((b.name || "").toLowerCase()))
);
for (let i = 0; i < contentBlocks.length; i++) {
const block = contentBlocks[i];
if (block.type === "text") {
res.write(`event: content_block_start\n`);
res.write(`data: ${JSON.stringify({
type: "content_block_start",
index: i,
content_block: { type: "text", text: "" }
})}\n\n`);
// Send text — one chunk when ANSI rendering is active (splitting
// ANSI escape sequences across 20-char chunks breaks terminal output).
// Plain text falls back to line-level chunks for a trickle effect.
// Never apply ANSI rendering to HTML content (<artifact> blocks):
// ANSI codes corrupt CSS selectors like `*` and break the browser viewer.
const rawBlockText = block.text || "";
const isHtmlContent = rawBlockText.includes("<artifact") || rawBlockText.trimStart().startsWith("<");
const text = isHtmlContent ? rawBlockText : renderText(rawBlockText);
const { enabled: ansiEnabled } = require("../utils/markdown-ansi");
if (ansiEnabled && !isHtmlContent) {
if (text.length > 0) {
res.write(`event: content_block_delta\n`);
res.write(`data: ${JSON.stringify({
type: "content_block_delta",
index: i,
delta: { type: "text_delta", text }
})}\n\n`);
}
} else {
const lines = text.split("\n");
for (const line of lines) {
const lineWithNl = line + "\n";
res.write(`event: content_block_delta\n`);
res.write(`data: ${JSON.stringify({
type: "content_block_delta",
index: i,
delta: { type: "text_delta", text: lineWithNl }
})}\n\n`);
}
}
res.write(`event: content_block_stop\n`);
res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`);
} else if (block.type === "thinking") {
res.write(`event: content_block_start\n`);
res.write(`data: ${JSON.stringify({
type: "content_block_start",
index: i,
content_block: { type: "thinking", thinking: "" }
})}\n\n`);
const thinkingText = block.thinking || "";
const thinkChunkSize = 40;
for (let j = 0; j < thinkingText.length; j += thinkChunkSize) {
res.write(`event: content_block_delta\n`);
res.write(`data: ${JSON.stringify({
type: "content_block_delta",
index: i,
delta: { type: "thinking_delta", thinking: thinkingText.slice(j, j + thinkChunkSize) }
})}\n\n`);
}
res.write(`event: content_block_stop\n`);
res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`);
} else if (block.type === "tool_use") {
// Original request had no tools → model hallucinated a tool call.
// Extract file content from write-style tools and wrap it in an
// <artifact> block so open-design routes it to the Design panel.
const toolName = (block.name || "").toLowerCase();
const writeTools = new Set(["write", "create_file", "write_file", "str_replace_editor"]);
if (writeTools.has(toolName)) {
const rawContent = block.input?.content ?? block.input?.file_content ?? block.input?.new_content ?? "";
const filePath = String(block.input?.file_path ?? block.input?.filename ?? "design.html");
const content = String(rawContent);
if (content) {
// Wrap in <artifact> so open-design's parser routes it to the file viewer.
const identifier = filePath.replace(/[^a-zA-Z0-9._-]/g, "_");
const title = filePath;
const wrapped = `<artifact identifier="${identifier}" type="text/html" title="${title}">\n${content}\n</artifact>`;
res.write(`event: content_block_start\n`);
res.write(`data: ${JSON.stringify({
type: "content_block_start",
index: i,
content_block: { type: "text", text: "" }
})}\n\n`);
res.write(`event: content_block_delta\n`);
res.write(`data: ${JSON.stringify({
type: "content_block_delta",
index: i,
delta: { type: "text_delta", text: wrapped }
})}\n\n`);
res.write(`event: content_block_stop\n`);
res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`);
}
}
// Non-write tool_use in a tool-less request is silently dropped.
}
}
// 3. message_delta with stop_reason
res.write(`event: message_delta\n`);
res.write(`data: ${JSON.stringify({
type: "message_delta",
delta: { stop_reason: msg.stop_reason || "end_turn", stop_sequence: null },
usage: { output_tokens: msg.usage?.output_tokens || 0 }
})}\n\n`);
// 4. message_stop
res.write(`event: message_stop\n`);
res.write(`data: ${JSON.stringify({ type: "message_stop" })}\n\n`);
metrics.recordResponse(result.status);
res.end();
return;
}
// Non-streaming or tool-based requests (buffered path)
timer.mark("preProcessMessage");
const result = await processMessage({
payload: req.body,
headers: req.headers,
session: req.session,
cwd: clientCwd,
options: {
maxSteps: req.body?.max_steps,
maxDurationMs: req.body?.max_duration_ms,
tenantPolicy: res.locals?.tenantPolicy || null,
},
});
timer.mark("processMessage");
timer.done();
// Legacy streaming wrapper (for tool-based requests that requested streaming)
if (wantsStream && hasTools) {
metrics.recordStreamingStart();
res.set({
"Content-Type": "text/event-stream",
"Cache-Control": "no-cache",
Connection: "keep-alive",
});
if (typeof res.flushHeaders === "function") {
res.flushHeaders();
}
// Check if result.body exists
if (!result || !result.body) {
res.write(`event: error\n`);
res.write(`data: ${JSON.stringify({ type: "error", error: { message: "Empty response from provider" } })}\n\n`);
res.end();
return;
}
// Use proper Anthropic SSE format
const msg = result.body;
// 1. message_start
res.write(`event: message_start\n`);
res.write(`data: ${JSON.stringify({
type: "message_start",
message: {
id: msg.id,
type: "message",
role: "assistant",
content: [],
model: msg.model,
stop_reason: null,
stop_sequence: null,
usage: { input_tokens: msg.usage?.input_tokens || 0, output_tokens: 1 }
}
})}\n\n`);
// 2. content_block_start and content_block_delta for each content block
// Filter out server-side tools that shouldn't reach the client
const _serverTools = new Set(["task", "websearch", "webfetch", "web_search", "web_fetch", "web_agent"]);
const contentBlocks = (msg.content || []).filter(b =>
!(b.type === "tool_use" && _serverTools.has((b.name || "").toLowerCase()))
);
for (let i = 0; i < contentBlocks.length; i++) {
const block = contentBlocks[i];
if (block.type === "text") {
res.write(`event: content_block_start\n`);
res.write(`data: ${JSON.stringify({
type: "content_block_start",
index: i,
content_block: { type: "text", text: "" }
})}\n\n`);
const rawBlockText2 = block.text || "";
const isHtmlContent2 = rawBlockText2.includes("<artifact") || rawBlockText2.trimStart().startsWith("<");
const text = isHtmlContent2 ? rawBlockText2 : renderText(rawBlockText2);
const { enabled: ansiEnabled } = require("../utils/markdown-ansi");
if (ansiEnabled && !isHtmlContent2) {
if (text.length > 0) {
res.write(`event: content_block_delta\n`);
res.write(`data: ${JSON.stringify({
type: "content_block_delta",
index: i,
delta: { type: "text_delta", text }
})}\n\n`);
}
} else {
const lines = text.split("\n");
for (const line of lines) {
const lineWithNl = line + "\n";
res.write(`event: content_block_delta\n`);
res.write(`data: ${JSON.stringify({
type: "content_block_delta",
index: i,
delta: { type: "text_delta", text: lineWithNl }
})}\n\n`);
}
}
res.write(`event: content_block_stop\n`);
res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`);
} else if (block.type === "thinking") {
res.write(`event: content_block_start\n`);
res.write(`data: ${JSON.stringify({
type: "content_block_start",
index: i,
content_block: { type: "thinking", thinking: "" }
})}\n\n`);
const thinkingText = block.thinking || "";
const thinkChunkSize = 40;
for (let j = 0; j < thinkingText.length; j += thinkChunkSize) {
res.write(`event: content_block_delta\n`);
res.write(`data: ${JSON.stringify({
type: "content_block_delta",
index: i,
delta: { type: "thinking_delta", thinking: thinkingText.slice(j, j + thinkChunkSize) }
})}\n\n`);
}
res.write(`event: content_block_stop\n`);
res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`);
} else if (block.type === "tool_use") {
res.write(`event: content_block_start\n`);
res.write(`data: ${JSON.stringify({
type: "content_block_start",
index: i,
content_block: { type: "tool_use", id: block.id, name: block.name, input: {} }
})}\n\n`);
res.write(`event: content_block_delta\n`);
res.write(`data: ${JSON.stringify({
type: "content_block_delta",
index: i,
delta: { type: "input_json_delta", partial_json: JSON.stringify(block.input) }
})}\n\n`);
res.write(`event: content_block_stop\n`);
res.write(`data: ${JSON.stringify({ type: "content_block_stop", index: i })}\n\n`);
}
}
// 3. message_delta with stop_reason
res.write(`event: message_delta\n`);
res.write(`data: ${JSON.stringify({
type: "message_delta",
delta: { stop_reason: msg.stop_reason || "end_turn", stop_sequence: null },
usage: { output_tokens: msg.usage?.output_tokens || 0 }
})}\n\n`);
// 4. message_stop
res.write(`event: message_stop\n`);
res.write(`data: ${JSON.stringify({ type: "message_stop" })}\n\n`);
metrics.recordResponse(result.status);
res.end();
return;
}
// Add routing headers (Phase 3)
Object.entries(routingHeaders).forEach(([key, value]) => {
if (value !== undefined) {
res.setHeader(key, value);
}
});
if (result.headers) {
Object.entries(result.headers).forEach(([key, value]) => {
if (value !== undefined) {
res.setHeader(key, value);
}
});
}
// Inject visible interaction block into the response body when
// LYNKR_VISIBLE_ROUTING=true. We only mutate JSON bodies — and only
// when the response looks like a valid Anthropic Message — so this
// is a no-op for streamed / error / non-message responses.
let finalBody = result.body;
if (
config.routing?.visibleInteraction &&
interaction &&
result.status >= 200 && result.status < 300 &&
result.body
) {
try {
const text = Buffer.isBuffer(result.body) ? result.body.toString('utf8') : result.body;
if (typeof text === 'string' && text.startsWith('{')) {
const parsed = JSON.parse(text);
if (parsed && typeof parsed === 'object' && parsed.type === 'message') {
parsed.lynkr_interaction = interaction;
// Inject a visible text block into content so Claude Code renders it.
if (Array.isArray(parsed.content)) {
const lines = [
`╭─ Lynkr ${'─'.repeat(40)}`,
`│ Tier ${interaction.tier ?? '—'} → ${interaction.model ?? '—'} (${interaction.provider ?? '—'})`,
`│ Score ${interaction.complexity_score ?? '—'}/100 · Risk: ${interaction.risk ?? '—'} · Savings: ~${interaction.estimated_savings_percent ?? 0}%`,
`│ Route ${interaction.mode ?? '—'} — ${interaction.headline ?? ''}`,
`╰${'─'.repeat(46)}`,
];
parsed.content.unshift({ type: 'text', text: lines.join('\n') });
}
finalBody = JSON.stringify(parsed);
}
}
} catch (err) {
logger.debug({ err: err.message }, '[Router] Skipped interaction injection (non-JSON body)');
}
}
metrics.recordResponse(result.status);
res.status(result.status).send(finalBody);
} catch (error) {
next(error);
}
});
// List available agents (must come before parameterized routes)
router.get("/v1/agents", (req, res) => {
try {
const { listAgents } = require("../agents");
const agents = listAgents();
res.json({ agents });
} catch (error) {
res.status(500).json({ error: error.message });
}
});
// Agent stats endpoint (specific path before parameterized)
router.get("/v1/agents/stats", (req, res) => {
try {
const { getAgentStats } = require("../agents");
const stats = getAgentStats();
res.json({ stats });
} catch (error) {
res.status(500).json({ error: error.message });
}
});
// Read agent transcript (specific path with param before catch-all)
router.get("/v1/agents/:agentId/transcript", (req, res) => {
try {
const ContextManager = require("../agents/context-manager");
const cm = new ContextManager();
const transcript = cm.readTranscript(req.params.agentId);
if (!transcript) {
return res.status(404).json({ error: "Transcript not found" });
}
res.json({ transcript });
} catch (error) {
res.status(500).json({ error: error.message });
}
});
// Agent execution details (parameterized - must come last)
router.get("/v1/agents/:executionId", (req, res) => {
try {
const { getAgentExecution } = require("../agents");
const details = getAgentExecution(req.params.executionId);
if (!details) {
return res.status(404).json({ error: "Execution not found" });
}
res.json(details);
} catch (error) {
res.status(500).json({ error: error.message });
}
});
// Token usage statistics for a session
router.get("/api/sessions/:sessionId/tokens", (req, res) => {
try {
const tokens = require("../utils/tokens");
const { sessionId } = req.params;
const session = getSession(sessionId);
if (!session) {
return res.status(404).json({ error: "Session not found" });
}
const stats = tokens.getSessionTokenStats(session);
res.json({
sessionId,
stats: {
turns: stats.turns,
totalTokens: stats.totalTokens,
totalCost: parseFloat(stats.totalCost.toFixed(4)),
averageTokensPerTurn: stats.averageTokensPerTurn,
cacheHitRate: parseFloat(stats.cacheHitRate) + '%'
},
breakdown: stats.breakdown.map(turn => ({
turn: turn.turn,
timestamp: turn.timestamp,
model: turn.model,
estimated: turn.estimated.total,
actual: {
input: turn.actual.inputTokens,
output: turn.actual.outputTokens,
cached: turn.actual.cacheReadTokens,
total: turn.actual.totalTokens
},
cost: parseFloat(turn.cost.total.toFixed(6))
}))
});
} catch (error) {
res.status(500).json({ error: error.message });
}
});
// Global token usage statistics (all sessions)
router.get("/api/tokens/stats", (req, res) => {
try {
const tokens = require("../utils/tokens");
const { getAllSessions } = require("../sessions");
const allSessions = getAllSessions();
let totalTokens = 0;
let totalCost = 0;
let totalTurns = 0;
let totalSessions = 0;
for (const session of allSessions) {
const stats = tokens.getSessionTokenStats(session);
if (stats.turns > 0) {
totalTokens += stats.totalTokens;
totalCost += stats.totalCost;
totalTurns += stats.turns;
totalSessions++;
}
}
res.json({
global: {
sessions: totalSessions,
turns: totalTurns,
totalTokens,
totalCost: parseFloat(totalCost.toFixed(4)),
averageTokensPerTurn: totalTurns > 0 ? Math.round(totalTokens / totalTurns) : 0,
averageTokensPerSession: totalSessions > 0 ? Math.round(totalTokens / totalSessions) : 0
}
});
} catch (error) {
res.status(500).json({ error: error.message });
}
});
// Mount OpenAI-compatible endpoints for Cursor IDE support
router.use("/v1", openaiRouter);
// Mount Anthropic-compatible provider discovery endpoints (cc-relay style)
// These provide /v1/models and /v1/providers for Claude Code CLI compatibility
router.use("/v1", providersRouter);
// Headroom compression endpoints
router.get("/metrics/compression", async (req, res) => {
try {
const { getCombinedMetrics } = require("../headroom");
const metrics = await getCombinedMetrics();
res.json(metrics);
} catch (error) {
res.status(500).json({ error: error.message });
}
});
router.get("/metrics/tool-compression", (req, res) => {
const { getMetrics } = require("../context/tool-result-compressor");
res.json(getMetrics());
});
router.get("/tee/:id", (req, res) => {
const { teeGet } = require("../context/tool-result-compressor");
const content = teeGet(req.params.id);
if (!content) return res.status(404).json({ error: "Tee entry not found or expired" });
res.type("text/plain").send(content);
});
router.get("/health/headroom", async (req, res) => {
try {
const { getHeadroomManager } = require("../headroom");
const manager = getHeadroomManager();
const health = await manager.getHealth();
res.status(health.healthy ? 200 : 503).json(health);
} catch (error) {
res.status(500).json({ error: error.message });
}
});
router.get("/headroom/status", async (req, res) => {
try {
const { getHeadroomManager } = require("../headroom");
const manager = getHeadroomManager();
const status = await manager.getDetailedStatus();
res.json(status);
} catch (error) {
res.status(500).json({ error: error.message });
}
});
router.post("/headroom/restart", async (req, res) => {
try {
const { getHeadroomManager } = require("../headroom");
const manager = getHeadroomManager();
const result = await manager.restart();
res.json({ success: true, ...result });
} catch (error) {
res.status(500).json({ error: error.message });
}
});
router.get("/headroom/logs", async (req, res) => {
try {
const { getHeadroomManager } = require("../headroom");
const manager = getHeadroomManager();
const tail = parseInt(req.query.tail || "100", 10);
const logs = await manager.getLogs(tail);
if (logs === null) {
return res.status(400).json({ error: "Docker management is disabled" });
}
res.type("text/plain").send(logs);
} catch (error) {
res.status(500).json({ error: error.message });
}
});
module.exports = router;