lynkr
Version:
Self-hosted LLM gateway and tier-routing proxy for Claude Code, Cursor, and Codex. Routes across Ollama, AWS Bedrock, OpenRouter, Databricks, Azure OpenAI, llama.cpp, and LM Studio with prompt caching, MCP tools, and 60-80% cost savings.
136 lines (123 loc) • 4.56 kB
JavaScript
/**
* Request Bypass
*
* Short-circuits Claude Code CLI housekeeping requests that don't need a real
* model call:
* - "Warmup" pings the CLI sends to prime a connection
* - Topic/title extraction (the CLI asks for {"isNewTopic":..,"title":..})
* - Single-word "count" / "Warmup" probes
*
* Returning a canned response here saves a full provider round-trip (latency
* and tokens) on every session. Inspired by 9router's bypassHandler.
*
* Always on — only ever returns a canned response for unambiguous Claude CLI
* housekeeping traffic, never for real work.
*
* @module orchestrator/bypass
*/
const logger = require("../logger");
/** Flatten Anthropic content (string | block[]) into plain text. */
function getText(content) {
if (typeof content === "string") return content;
if (Array.isArray(content)) {
return content
.filter((b) => b && b.type === "text" && typeof b.text === "string")
.map((b) => b.text)
.join(" ");
}
return "";
}
/** Flatten the top-level Anthropic `system` field (string | block[]). */
function getSystemText(system) {
if (typeof system === "string") return system;
if (Array.isArray(system)) {
return system
.filter((s) => s && s.type === "text" && typeof s.text === "string")
.map((s) => s.text)
.join(" ");
}
return "";
}
/**
* Decide whether a request is a bypassable Claude CLI housekeeping call.
*
* @param {object} args
* @param {object} args.payload - The Anthropic request body.
* @param {object} [args.headers] - Lowercased request headers.
* @returns {{kind: string, text: string}|null} bypass descriptor or null.
*/
function detectBypass({ payload, headers = {} }) {
if (!payload || !Array.isArray(payload.messages) || payload.messages.length === 0) {
return null;
}
// Only bypass Claude CLI traffic — other clients use these endpoints for
// real work and must never receive a canned response.
const ua = String(headers["user-agent"] || "").toLowerCase();
if (!ua.includes("claude-cli")) return null;
const messages = payload.messages;
const lastMsg = messages[messages.length - 1];
// Pattern 1: Title prefill — the CLI seeds an assistant turn with just "{"
// to coax a JSON object out of the model.
if (lastMsg?.role === "assistant") {
const firstBlockText =
Array.isArray(lastMsg.content) && lastMsg.content[0]?.type === "text"
? lastMsg.content[0].text
: typeof lastMsg.content === "string"
? lastMsg.content
: "";
if (firstBlockText.trim() === "{") {
return { kind: "title_prefill", text: "{}" };
}
}
// Pattern 2: Topic/title extraction — system prompt asks for isNewTopic.
// Synthesize a title from the first user message instead of calling a model.
const systemText = getSystemText(payload.system);
if (systemText.includes("isNewTopic")) {
const userMsg = messages.find((m) => m.role === "user");
const userText = getText(userMsg?.content).trim();
const title = userText.split(/\s+/).filter(Boolean).slice(0, 3).join(" ");
return {
kind: "title_extraction",
text: JSON.stringify({ isNewTopic: true, title }),
};
}
// Pattern 3: Warmup / count probes — a single short user message.
if (messages.length === 1 && messages[0]?.role === "user") {
const firstText = getText(messages[0].content).trim();
if (firstText === "Warmup" || firstText === "count") {
return { kind: firstText.toLowerCase(), text: "OK" };
}
}
return null;
}
/**
* Build the processMessage-shaped response for a bypass descriptor.
* Matches the `{ status, body, terminationReason }` contract the router
* consumes (same shape as the prompt-cache early returns).
*
* @param {{kind: string, text: string}} bypass
* @param {string} model - Model id to echo back.
* @returns {{status: number, body: object, terminationReason: string}}
*/
function buildBypassResponse(bypass, model) {
logger.info({ kind: bypass.kind }, "[Bypass] Short-circuiting CLI housekeeping request");
return {
status: 200,
body: {
id: `msg_bypass_${Date.now()}`,
type: "message",
role: "assistant",
content: [{ type: "text", text: bypass.text }],
model: model || "claude-3-unknown",
stop_reason: "end_turn",
stop_sequence: null,
usage: { input_tokens: 1, output_tokens: 1 },
lynkr_bypass: { kind: bypass.kind },
},
terminationReason: `bypass_${bypass.kind}`,
};
}
module.exports = {
detectBypass,
buildBypassResponse,
};