UNPKG

@juspay/neurolink

Version:

Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio

1,149 lines 165 kB
/** * Claude-Compatible Proxy Routes * * Exposes Anthropic-compatible /v1/messages, /v1/models, and /v1/messages/count_tokens * endpoints. ALL requests are routed through ctx.neurolink.generate() / ctx.neurolink.stream() * -- no direct HTTP calls to Anthropic. * * An optional ModelRouter can remap incoming model names to different * provider/model pairs (e.g. "claude-sonnet-4-20250514" -> vertex/gemini-2.5-pro). * Without a router, models are passed through to the Anthropic provider. */ import { access, readFile } from "node:fs/promises"; import { homedir } from "node:os"; import { join } from "node:path"; import { buildStableClaudeCodeBillingHeader, CLAUDE_CLI_USER_AGENT, CLAUDE_CODE_OAUTH_BETAS, getOrCreateClaudeCodeIdentity, parseClaudeCodeUserId, } from "../../auth/anthropicOAuth.js"; import { parseQuotaHeaders, saveAccountQuota, } from "../../proxy/accountQuota.js"; import { buildClaudeError, ClaudeStreamSerializer, generateToolUseId, parseClaudeRequest, serializeClaudeResponse, } from "../../proxy/claudeFormat.js"; import { tracers } from "../../telemetry/tracers.js"; import { withSpan } from "../../telemetry/withSpan.js"; import { ProxyTracer, recordFallbackAttempt } from "../../proxy/proxyTracer.js"; import { createRawStreamCapture } from "../../proxy/rawStreamCapture.js"; import { logBodyCapture, logRequest, logRequestAttempt, logStreamError, } from "../../proxy/requestLogger.js"; import { createSSEInterceptor } from "../../proxy/sseInterceptor.js"; import { needsRefresh, persistTokens, refreshToken, } from "../../proxy/tokenRefresh.js"; import { buildProxyTranslationPlan, parseRetryAfterMs, } from "../../proxy/routingPolicy.js"; import { writeJsonSnapshotAtomically } from "../../proxy/snapshotPersistence.js"; import { recordAttempt, recordAttemptError, recordFinalError, recordFinalSuccess, } from "../../proxy/usageStats.js"; import { logger } from "../../utils/logger.js"; import { ProviderHealthChecker } from "../../utils/providerHealth.js"; // --------------------------------------------------------------------------- // Helpers // --------------------------------------------------------------------------- /** Headers that must never be forwarded upstream to Anthropic. */ const BLOCKED_UPSTREAM_HEADERS = new Set([ "cookie", "proxy-authorization", "host", "connection", "content-length", "transfer-encoding", ]); // --------------------------------------------------------------------------- // Module-level state // --------------------------------------------------------------------------- /** Fill-first: index of the current primary account. Only advances when * the current account exhausts 429 retries or auth retries fail. */ let primaryAccountIndex = 0; /** Track account count so we can reset primaryAccountIndex when it changes. */ let lastKnownAccountCount = 0; /** Stable account key (e.g. "anthropic:user@example.com") of the configured * home/primary account. Set once at proxy boot from routing.primaryAccount. * When undefined, home semantics fall back to enabledAccounts[0] (insertion * order) — preserves pre-existing behavior. */ let configuredPrimaryAccountKey; const MAX_AUTH_RETRIES = 5; const MAX_CONSECUTIVE_REFRESH_FAILURES = 15; const MAX_TRANSIENT_SAME_ACCOUNT_RETRIES = 2; const TRANSIENT_SAME_ACCOUNT_RETRY_DELAYS_MS = [250, 1_000]; /** Maximum upstream 429 attempts per account before rotating to the next account. * Total attempts per account = this + 1 (the initial call plus this many retries). */ const MAX_RATE_LIMIT_SAME_ACCOUNT_RETRIES = 10; /** Max time to sleep between 429 retries. Caps large upstream retry-after values * so we don't hold the client connection open for minutes. */ const MAX_RATE_LIMIT_RETRY_DELAY_MS = 30_000; /** Timeout for upstream requests to Anthropic. Must be generous enough * to cover the full lifecycle of streaming responses, including extended * thinking from Opus models (which can exceed 5 minutes for large contexts). */ const UPSTREAM_FETCH_TIMEOUT_MS = 15 * 60 * 1000; // 15 minutes const accountRuntimeState = new Map(); /** Track whether we've run the one-time startup prune. */ let startupPruneDone = false; /** Default cooling period when retries are exhausted and upstream didn't * provide a retry-after header. Short enough to recover quickly, long * enough to avoid immediately hammering the same account. */ const DEFAULT_COOLING_PERIOD_MS = 60_000; /** Advance the primary account index when the current primary is exhausted * (429 retries exhausted or auth failure). This is what makes fill-first work: * we stick to one account until it's unusable. Only advances when the exhausted * account IS the current primary; otherwise it's already a fallback. */ function advancePrimaryIfCurrent(accountKey, enabledCount, primaryAccountKey) { if (enabledCount <= 1) { return; } // Only advance if the cooled account is the current primary if (accountKey !== primaryAccountKey) { return; } primaryAccountIndex = (primaryAccountIndex + 1) % enabledCount; } /** Resolve the configured primary's stable key to its current index in the * request's enabledAccounts list. Returns 0 (insertion-order fallback) when * no key is configured or the key cannot be matched (account disabled/ * removed). The resolution is per-request because enabledAccounts membership * can shift between requests. */ function resolveHomeIndex(enabledAccounts) { if (!configuredPrimaryAccountKey) { return 0; } const idx = enabledAccounts.findIndex((a) => a.key === configuredPrimaryAccountKey); return idx >= 0 ? idx : 0; } /** If the configured home primary is no longer cooling, reset * primaryAccountIndex back to its index so traffic returns to the preferred * account once its rate limit window expires. Called at the start of each * request. Home is resolved fresh per call via resolveHomeIndex. */ function maybeResetPrimaryToHome(enabledAccounts) { if (enabledAccounts.length <= 1) { return; } const homeIndex = resolveHomeIndex(enabledAccounts); if (primaryAccountIndex === homeIndex) { return; } const homeAccount = enabledAccounts[homeIndex]; const homeState = accountRuntimeState.get(homeAccount.key); if (!homeState || !homeState.coolingUntil || Date.now() >= homeState.coolingUntil) { // Home account is no longer cooling — reset to it primaryAccountIndex = homeIndex; if (homeState?.coolingUntil) { homeState.coolingUntil = undefined; logger.always(`[proxy] home primary account=${homeAccount.label} cooling expired, resetting primaryAccountIndex to ${homeIndex}`); } } } /** Check if an account is currently in its cooling window. */ function isAccountCooling(accountKey) { const state = accountRuntimeState.get(accountKey); return !!state?.coolingUntil && Date.now() < state.coolingUntil; } // --------------------------------------------------------------------------- // OAuth polyfill helpers (extracted to reduce block nesting) // --------------------------------------------------------------------------- const snapshotCache = new Map(); const SNAPSHOT_CACHE_TTL_MS = 5 * 60 * 1000; // 5 minutes const SNAPSHOT_STABLE_HEADERS = new Set([ "accept", "accept-encoding", "accept-language", "anthropic-beta", "anthropic-dangerous-direct-browser-access", "anthropic-version", "sec-fetch-mode", "user-agent", "x-app", "x-stainless-arch", "x-stainless-lang", "x-stainless-os", "x-stainless-package-version", "x-stainless-retry-count", "x-stainless-runtime", "x-stainless-runtime-version", "x-stainless-timeout", "x-subscription-tier", ]); const NON_CLAUDE_OAUTH_BETAS = [ "oauth-2025-04-20", "claude-code-20250219", "fine-grained-tool-streaming-2025-05-14", ]; function getSnapshotSafeLabel(accountLabel) { return accountLabel.replace(/[^a-zA-Z0-9._@-]/g, "_"); } function getSnapshotPath(accountLabel) { return join(homedir(), ".neurolink", "header-snapshots", `anthropic_${getSnapshotSafeLabel(accountLabel)}.json`); } function applySnapshotHeaders(headers, snapshot) { if (!snapshot?.headers) { return; } for (const [sk, sv] of Object.entries(snapshot.headers)) { const lower = sk.toLowerCase(); if (typeof sv === "string" && !headers[lower] && !BLOCKED_UPSTREAM_HEADERS.has(lower) && lower !== "authorization" && lower !== "x-api-key" && lower !== "x-claude-code-session-id") { headers[lower] = sv; } } } async function loadClaudeSnapshot(accountLabel) { try { const safeLabel = getSnapshotSafeLabel(accountLabel); const cached = snapshotCache.get(safeLabel); if (cached && Date.now() - cached.loadedAt < SNAPSHOT_CACHE_TTL_MS) { return cached.snapshot; } const snapshotPath = getSnapshotPath(accountLabel); try { await access(snapshotPath); } catch { return null; } const snapshot = JSON.parse(await readFile(snapshotPath, "utf8")); if (!snapshot || typeof snapshot !== "object") { return null; } const normalized = { accountKey: "accountKey" in snapshot && typeof snapshot.accountKey === "string" ? snapshot.accountKey : `anthropic:${accountLabel}`, capturedAt: "capturedAt" in snapshot && typeof snapshot.capturedAt === "string" ? snapshot.capturedAt : new Date(0).toISOString(), source: "claude-code", headers: "headers" in snapshot && snapshot.headers ? snapshot.headers : {}, ...(snapshot.body ? { body: snapshot.body } : {}), }; if (Object.keys(normalized.headers).length === 0 && Object.keys(normalized.body ?? {}).length === 0) { return null; } snapshotCache.set(safeLabel, { snapshot: normalized, loadedAt: Date.now(), }); return normalized; } catch { return null; } } function buildSnapshotHeaders(headers, existingHeaders) { const merged = { ...(existingHeaders ?? {}) }; for (const [key, value] of Object.entries(headers)) { const lower = key.toLowerCase(); if (typeof value === "string" && SNAPSHOT_STABLE_HEADERS.has(lower) && !BLOCKED_UPSTREAM_HEADERS.has(lower) && lower !== "authorization" && lower !== "x-api-key" && lower !== "x-claude-code-session-id") { merged[lower] = value; } } return merged; } function extractSnapshotBody(body) { if (!body || typeof body !== "object") { return undefined; } const parsed = body; const identity = parseClaudeCodeUserId(parsed.metadata?.user_id); const systemBlocks = Array.isArray(parsed.system) ? parsed.system : typeof parsed.system === "string" ? [{ type: "text", text: parsed.system }] : []; const billingHeader = systemBlocks.find((block) => typeof block?.text === "string" && block.text.includes("x-anthropic-billing-header"))?.text; const agentBlock = systemBlocks.find((block) => typeof block?.text === "string" && block.text.includes("Claude Agent SDK"))?.text; if (!identity && !billingHeader && !agentBlock) { return undefined; } return { ...(identity ? { metadataUserId: identity.metadataUserId } : {}), ...(identity ? { sessionId: identity.sessionId } : {}), ...(billingHeader ? { billingHeader } : {}), ...(agentBlock ? { agentBlock } : {}), }; } function isLikelyClaudeClient(headers, snapshotBody) { return (typeof headers["x-claude-code-session-id"] === "string" || headers["user-agent"]?.startsWith("claude-cli/") || !!snapshotBody?.metadataUserId || !!snapshotBody?.billingHeader || !!snapshotBody?.agentBlock); } function snapshotsMatch(existing, next) { if (!existing) { return false; } return (JSON.stringify(existing.headers ?? {}) === JSON.stringify(next.headers ?? {}) && JSON.stringify(existing.body ?? {}) === JSON.stringify(next.body ?? {})); } async function persistClaudeSnapshot(accountLabel, snapshot) { const snapshotPath = getSnapshotPath(accountLabel); await writeJsonSnapshotAtomically(snapshotPath, snapshot, 0o600); snapshotCache.set(getSnapshotSafeLabel(accountLabel), { snapshot, loadedAt: Date.now(), }); } async function maybeRefreshClaudeSnapshot(accountLabel, accountKey, headers, bodyStr) { const existing = await loadClaudeSnapshot(accountLabel); let parsedBody; try { parsedBody = JSON.parse(bodyStr); } catch { return existing; } const body = extractSnapshotBody(parsedBody); if (!isLikelyClaudeClient(headers, body)) { return existing; } const next = { accountKey, capturedAt: new Date().toISOString(), source: "claude-code", headers: buildSnapshotHeaders(headers, existing?.headers), body: { ...(existing?.body ?? {}), ...(body ?? {}), ...(typeof headers["x-claude-code-session-id"] === "string" ? { sessionId: headers["x-claude-code-session-id"] } : {}), }, }; if (snapshotsMatch(existing, next)) { return existing; } try { await persistClaudeSnapshot(accountLabel, next); } catch (error) { logger.warn("[proxy] failed to persist Claude snapshot", { accountLabel, error: error instanceof Error ? error.message : String(error), }); snapshotCache.set(getSnapshotSafeLabel(accountLabel), { snapshot: next, loadedAt: Date.now(), }); } return next; } /** * Polyfill the request body for OAuth accounts. * Claude Code injects a billing header, agent block, and metadata.user_id * into the body. Non-CC clients (Curator, custom apps) don't send these — * Anthropic rejects without them. */ function polyfillOAuthBody(bodyStr, accountToken, snapshot, preferredSessionId) { try { const parsed = JSON.parse(bodyStr); // Billing header block (required by Anthropic for OAuth) // NOTE: This block MUST be deterministic (no random values) to preserve // Anthropic's prompt caching prefix chain. We keep the real Claude Code // version/entrypoint shape when present, but stabilize the volatile cch. const agentBlock = { type: "text", text: snapshot?.body?.agentBlock || "You are a Claude agent, built on Anthropic's Claude Agent SDK.", }; // Normalise system to array and APPEND billing + agent blocks. // IMPORTANT: We append (not prepend) to preserve the client's cache // prefix chain. Anthropic's prompt caching uses prefix matching — if we // insert anything before the client's system blocks, we invalidate all // cached content (tools, system prompt, message history). // // Claude Code sends a billing block with a `cch=<hash>` value that changes // on every request. We fix this by: // 1. Removing the client's billing block from its current position // 2. Stabilizing it while keeping the official Claude Code shape // 3. Appending it at the END so the cacheable system blocks stay // at the front of the prefix chain if (parsed.system) { if (typeof parsed.system === "string") { parsed.system = [{ type: "text", text: parsed.system }]; } if (Array.isArray(parsed.system)) { // Find and remove existing billing/agent blocks from wherever // the client placed them (typically at system[0]) const billingIdx = parsed.system.findIndex((b) => typeof b.text === "string" && b.text.includes("x-anthropic-billing-header")); const agentIdx = parsed.system.findIndex((b) => typeof b.text === "string" && b.text.includes("Claude Agent SDK")); const billingBlock = { type: "text", text: buildStableClaudeCodeBillingHeader(parsed.system[billingIdx]?.text ?? snapshot?.body?.billingHeader), }; // Remove in reverse index order so indices stay valid const indicesToRemove = [billingIdx, agentIdx] .filter((i) => i >= 0) .sort((a, b) => b - a); for (const idx of indicesToRemove) { parsed.system.splice(idx, 1); } // Always append a deterministic billing block at the end. // If the client sent one, we stripped its dynamic cch= and use // our stable version instead. If not, we add ours. parsed.system = [...parsed.system, billingBlock, agentBlock]; } } else { const billingBlock = { type: "text", text: buildStableClaudeCodeBillingHeader(snapshot?.body?.billingHeader), }; parsed.system = [billingBlock, agentBlock]; } // Inject Claude-Code-shaped metadata.user_id (required for OAuth). const tokenPrefix = accountToken.substring(0, Math.min(20, accountToken.length)); const identity = getOrCreateClaudeCodeIdentity(tokenPrefix, { existingUserId: parsed.metadata?.user_id ?? snapshot?.body?.metadataUserId, preferredSessionId: preferredSessionId ?? snapshot?.body?.sessionId, }); parsed.metadata = { ...parsed.metadata, user_id: identity.metadataUserId, }; return { bodyStr: JSON.stringify(parsed), sessionId: identity.sessionId }; } catch { return { bodyStr }; // JSON parse failed — use original body } } // --------------------------------------------------------------------------- // Legacy credential refresh helper (extracted to reduce block nesting) // --------------------------------------------------------------------------- async function tryLoadLegacyAccount(creds, legacyCredPath) { if (!creds.oauth?.accessToken) { return undefined; } let legacyToken = creds.oauth.accessToken; let legacyRefresh = creds.oauth.refreshToken; let legacyExpiry = creds.oauth.expiresAt; const legacyExpired = legacyExpiry ? legacyExpiry < Date.now() : false; if (!legacyExpired) { return { key: "anthropic:legacy-default", label: "default", token: legacyToken, refreshToken: legacyRefresh, expiresAt: legacyExpiry, type: "oauth", persistTarget: { credPath: legacyCredPath }, }; } if (!legacyRefresh) { logger.always("[proxy] skipping legacy account (expired, no refresh token)"); return undefined; } const tmp = { token: legacyToken, refreshToken: legacyRefresh, expiresAt: legacyExpiry, label: "default", }; const ok = await refreshToken(tmp); if (!ok.success) { logger.always(`[proxy] skipping legacy account (expired, refresh failed: ${ok.error?.slice(0, 200) ?? "unknown"})`); return undefined; } legacyToken = tmp.token; legacyRefresh = tmp.refreshToken; legacyExpiry = tmp.expiresAt; await persistTokens(legacyCredPath, tmp); logger.always("[proxy] refreshed legacy account at startup"); return { key: "anthropic:legacy-default", label: "default", token: legacyToken, refreshToken: legacyRefresh, expiresAt: legacyExpiry, type: "oauth", persistTarget: { credPath: legacyCredPath }, }; } async function handleTranslatedClaudeRequest(args) { const { ctx, body, route, modelRouter, tracer, requestStartTime, logProxyBody, } = args; tracer?.setMode("full"); const parsed = parseClaudeRequest(body); const plan = buildProxyTranslationPlan({ provider: route.provider, model: route.model, }, modelRouter?.getFallbackChain() ?? [], body.model, parsed); logProxyRoutingPlan(logProxyBody, "translated_request", plan); const attempts = plan.attempts; if (body.stream) { return handleTranslatedClaudeStreamRequest({ ctx, body, attempts, parsed, tracer, requestStartTime, }); } return handleTranslatedClaudeJsonRequest({ ctx, body, attempts, parsed, tracer, requestStartTime, logProxyBody, }); } function logProxyRoutingPlan(logProxyBody, stage, plan) { logProxyBody({ phase: "routing_decision", contentType: "application/json", body: { stage, attempts: plan.attempts, }, }); } async function handleTranslatedClaudeStreamRequest(args) { const { ctx, body, attempts, parsed, tracer, requestStartTime } = args; const serializer = new ClaudeStreamSerializer(body.model, 0); const KEEPALIVE_INTERVAL_MS = 15_000; const encoder = new TextEncoder(); let translationKeepAliveTimer; let translationCancelled = false; let translationSucceeded = false; let translatedModel; let finalStreamError = "No translation providers succeeded"; let upstreamIterator; const translationStream = new ReadableStream({ async start(controller) { for (const frame of serializer.start()) { controller.enqueue(encoder.encode(frame)); } translationKeepAliveTimer = setInterval(() => { try { controller.enqueue(encoder.encode(": keep-alive\n\n")); } catch { // Controller already closed. } }, KEEPALIVE_INTERVAL_MS); try { for (let attemptIndex = 0; attemptIndex < attempts.length; attemptIndex++) { const attempt = attempts[attemptIndex]; if (attemptIndex > 0) { logger.always(`[proxy] fallback → ${attempt.label}`); } let collectedText = ""; try { const options = buildProxyFallbackOptions(parsed, attempt.provider ? { provider: attempt.provider, model: attempt.model, } : {}); const streamResult = await ctx.neurolink.stream(options); const iterable = streamResult.stream; upstreamIterator = iterable[Symbol.asyncIterator](); while (true) { if (translationCancelled) { break; } const { value: chunk, done } = await upstreamIterator.next(); if (done || translationCancelled) { break; } const text = extractText(chunk); if (text) { collectedText += text; for (const frame of serializer.pushDelta(text)) { controller.enqueue(encoder.encode(frame)); } } } const toolCalls = streamResult.toolCalls ?? []; if (!hasTranslatedOutput(collectedText, toolCalls)) { finalStreamError = `Translated provider ${attempt.label} returned no content or tool calls`; logger.debug(`[proxy] translation attempt ${attempt.label} returned no content or tool calls`); continue; } if (!translationCancelled && toolCalls.length) { for (const toolCall of toolCalls) { const toolName = toolCall.toolName ?? toolCall.name ?? "unknown"; for (const frame of serializer.pushToolUse(generateToolUseId(), toolName, extractToolArgs(toolCall))) { controller.enqueue(encoder.encode(frame)); } } } if (!translationCancelled) { const reason = streamResult.finishReason ?? "end_turn"; const resolvedUsage = extractUsageFromStreamResult(streamResult.usage); for (const frame of serializer.finish(resolvedUsage.output, reason)) { controller.enqueue(encoder.encode(frame)); } } translatedModel = streamResult.model; translationSucceeded = true; return; } catch (streamErr) { if (translationCancelled) { return; } finalStreamError = streamErr instanceof Error ? streamErr.message : String(streamErr); if (collectedText.trim().length > 0) { logger.always(`[proxy] mid-stream error (translation mode): ${finalStreamError}`); const errorEvent = `event: error\ndata: ${JSON.stringify({ type: "error", error: { type: "api_error", message: `Upstream stream interrupted: ${finalStreamError}` } })}\n\n`; controller.enqueue(encoder.encode(errorEvent)); return; } logger.debug(`[proxy] translation attempt ${attempt.label} failed: ${finalStreamError}`); } } if (!translationCancelled) { logger.always(`[proxy] mid-stream error (translation mode): ${finalStreamError}`); const errorEvent = `event: error\ndata: ${JSON.stringify({ type: "error", error: { type: "api_error", message: `Upstream stream interrupted: ${finalStreamError}` } })}\n\n`; controller.enqueue(encoder.encode(errorEvent)); } } finally { if (translationKeepAliveTimer) { clearInterval(translationKeepAliveTimer); } if (!translationCancelled) { controller.close(); } if (tracer && translatedModel && translatedModel !== body.model) { tracer.setModelSubstitution(body.model, translatedModel); } if (!translationSucceeded) { tracer?.setError("generation_error", finalStreamError.slice(0, 500)); } tracer?.end(200, Date.now() - requestStartTime); } }, cancel() { translationCancelled = true; if (translationKeepAliveTimer) { clearInterval(translationKeepAliveTimer); translationKeepAliveTimer = undefined; } if (upstreamIterator?.return) { upstreamIterator.return(undefined).catch((cancelErr) => { logger.debug(`[proxy] upstream cancel error: ${cancelErr instanceof Error ? cancelErr.message : String(cancelErr)}`); }); } }, }); return new Response(translationStream, { headers: { "content-type": "text/event-stream", "cache-control": "no-cache", connection: "keep-alive", }, }); } async function handleTranslatedClaudeJsonRequest(args) { const { ctx, body, attempts, parsed, tracer, requestStartTime, logProxyBody, } = args; let lastAttemptError = "No translation providers succeeded"; for (let attemptIndex = 0; attemptIndex < attempts.length; attemptIndex++) { const attempt = attempts[attemptIndex]; if (attemptIndex > 0) { logger.always(`[proxy] fallback → ${attempt.label}`); } try { const options = buildProxyFallbackOptions(parsed, attempt.provider ? { provider: attempt.provider, model: attempt.model, } : {}); const streamResult = await ctx.neurolink.stream(options); let collectedText = ""; for await (const chunk of streamResult.stream) { const text = extractText(chunk); if (text) { collectedText += text; } } if (!hasTranslatedOutput(collectedText, streamResult.toolCalls)) { lastAttemptError = `Translated provider ${attempt.label} returned no content or tool calls`; logger.debug(`[proxy] translation attempt ${attempt.label} returned no content or tool calls`); continue; } const internal = { content: collectedText, model: streamResult.model, finishReason: streamResult.finishReason ?? "end_turn", reasoning: undefined, usage: streamResult.usage ? extractUsageFromStreamResult(streamResult.usage) : undefined, toolCalls: streamResult.toolCalls, }; if (tracer && streamResult.model && streamResult.model !== body.model) { tracer.setModelSubstitution(body.model, streamResult.model); } tracer?.end(200, Date.now() - requestStartTime); const clientResponse = serializeClaudeResponse(internal, body.model); const clientResponseText = JSON.stringify(clientResponse); logProxyBody({ phase: "client_response", headers: { "content-type": "application/json" }, body: clientResponseText, bodySize: Buffer.byteLength(clientResponseText, "utf8"), contentType: "application/json", responseStatus: 200, durationMs: Date.now() - requestStartTime, }); return clientResponse; } catch (attemptError) { lastAttemptError = attemptError instanceof Error ? attemptError.message : String(attemptError); logger.debug(`[proxy] translation attempt ${attempt.label} failed: ${lastAttemptError}`); } } throw new Error(lastAttemptError); } async function handleClaudePassthroughRequest(args) { const { ctx, body, clientRequestBody, tracer, requestStartTime, logProxyBody, } = args; tracer?.setMode("passthrough-cli"); const bodyStr = clientRequestBody; const toolCount = Array.isArray(body.tools) ? body.tools.length : 0; const upstreamHeaders = {}; for (const [key, value] of Object.entries(ctx.headers)) { if (!BLOCKED_UPSTREAM_HEADERS.has(key.toLowerCase()) && value) { upstreamHeaders[key] = value; } } if (!upstreamHeaders["content-type"]) { upstreamHeaders["content-type"] = "application/json"; } const upstreamSpan = tracer?.startUpstreamAttempt({ account: "passthrough", attempt: 1, polyfillHeaders: false, polyfillBody: false, upstreamUrl: "https://api.anthropic.com/v1/messages?beta=true", }); tracer?.logUpstreamRequestHeaders(upstreamHeaders); tracer?.logUpstreamRequestBody(bodyStr); logProxyBody({ phase: "upstream_request", headers: upstreamHeaders, body: bodyStr, bodySize: Buffer.byteLength(bodyStr, "utf8"), contentType: upstreamHeaders["content-type"] ?? "application/json", account: "passthrough", accountType: "passthrough", attempt: 1, }); let response; try { response = await fetch("https://api.anthropic.com/v1/messages?beta=true", { method: "POST", headers: upstreamHeaders, body: bodyStr, signal: AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS), }); } catch (fetchErr) { const errMsg = fetchErr instanceof Error ? fetchErr.message : String(fetchErr); tracer?.setError("network_error", errMsg); upstreamSpan?.end(); tracer?.end(502, Date.now() - requestStartTime); logRequest({ timestamp: new Date().toISOString(), requestId: ctx.requestId, method: ctx.method, path: ctx.path, model: body.model, stream: body.stream ?? false, toolCount, account: "passthrough", accountType: "passthrough", responseStatus: 502, responseTimeMs: Date.now() - requestStartTime, errorType: "network_error", errorMessage: errMsg, }); const errorBody = buildClaudeError(502, `Passthrough fetch failed: ${errMsg}`); const errorBodyText = JSON.stringify(errorBody); logProxyBody({ phase: "client_response", headers: { "content-type": "application/json" }, body: errorBodyText, bodySize: Buffer.byteLength(errorBodyText, "utf8"), contentType: "application/json", account: "passthrough", accountType: "passthrough", attempt: 1, responseStatus: 502, durationMs: Date.now() - requestStartTime, }); return errorBody; } const upstreamResponseHeaders = {}; response.headers.forEach((value, key) => { upstreamResponseHeaders[key] = value; }); tracer?.logUpstreamResponseHeaders(upstreamResponseHeaders); if (!response.ok) { const errorText = await response.text(); tracer?.logUpstreamResponseBody(errorText); logProxyBody({ phase: "upstream_response", headers: upstreamResponseHeaders, body: errorText, bodySize: Buffer.byteLength(errorText, "utf8"), contentType: upstreamResponseHeaders["content-type"] ?? "application/json", account: "passthrough", accountType: "passthrough", attempt: 1, responseStatus: response.status, durationMs: Date.now() - requestStartTime, }); logProxyBody({ phase: "client_response", headers: upstreamResponseHeaders, body: errorText, bodySize: Buffer.byteLength(errorText, "utf8"), contentType: upstreamResponseHeaders["content-type"] ?? "application/json", account: "passthrough", accountType: "passthrough", attempt: 1, responseStatus: response.status, durationMs: Date.now() - requestStartTime, }); upstreamSpan?.end(); tracer?.setError("api_error", errorText.slice(0, 500)); tracer?.end(response.status, Date.now() - requestStartTime); try { return JSON.parse(errorText); } catch { return buildClaudeError(response.status, errorText); } } if (body.stream && response.body) { return handleClaudePassthroughStreamResponse({ ctx, body, bodyStr, response, tracer, requestStartTime, toolCount, upstreamSpan, upstreamResponseHeaders, logProxyBody, }); } return handleClaudePassthroughJsonResponse({ ctx, body, bodyStr, response, tracer, requestStartTime, toolCount, upstreamSpan, upstreamResponseHeaders, logProxyBody, }); } async function handleClaudePassthroughStreamResponse(args) { const { ctx, body, bodyStr, response, tracer, requestStartTime, toolCount, upstreamSpan, upstreamResponseHeaders, logProxyBody, } = args; const responseHeaders = { ...upstreamResponseHeaders }; const { stream: clientCaptureStream, capture: clientCapture } = createRawStreamCapture(); const responseBody = response.body; if (!responseBody) { throw new Error("Expected passthrough stream response body"); } let streamSource = responseBody; if (tracer) { try { const { stream: interceptor, telemetry } = createSSEInterceptor({ captureRawText: true, }); streamSource = streamSource.pipeThrough(interceptor); const capturedTracer = tracer; const capturedUpstreamSpan = upstreamSpan; const capturedResponse = response; const capturedRequestBytes = bodyStr.length; Promise.all([telemetry, clientCapture]) .then(([data, clientBody]) => { capturedTracer.setUsage({ inputTokens: data.usage.inputTokens, outputTokens: data.usage.outputTokens, cacheCreationTokens: data.usage.cacheCreationInputTokens, cacheReadTokens: data.usage.cacheReadInputTokens, }); capturedTracer.logStreamEvents(data.events); const rateLimit5h = parseFloat(capturedResponse.headers.get("anthropic-ratelimit-unified-5h-utilization") ?? ""); const rateLimit7d = parseFloat(capturedResponse.headers.get("anthropic-ratelimit-unified-7d-utilization") ?? ""); const usageUpdate = { inputTokens: data.usage.inputTokens, outputTokens: data.usage.outputTokens, cacheCreationTokens: data.usage.cacheCreationInputTokens, cacheReadTokens: data.usage.cacheReadInputTokens, }; if (!isNaN(rateLimit5h)) { usageUpdate.rateLimitAfter5h = rateLimit5h; } if (!isNaN(rateLimit7d)) { usageUpdate.rateLimitAfter7d = rateLimit7d; } if (!isNaN(rateLimit5h) || !isNaN(rateLimit7d)) { capturedTracer.setUsage(usageUpdate); } capturedTracer.logUpstreamResponseBody(data.rawText ?? ""); capturedTracer.recordMetrics(); capturedTracer.recordBodySizes(capturedRequestBytes, data.totalBytesReceived); capturedUpstreamSpan?.end(); capturedTracer.end(200, Date.now() - requestStartTime); const traceCtx = capturedTracer.getTraceContext(); logRequest({ timestamp: new Date().toISOString(), requestId: ctx.requestId, method: ctx.method, path: ctx.path, model: body.model, stream: true, toolCount, account: "passthrough", accountType: "passthrough", responseStatus: 200, responseTimeMs: Date.now() - requestStartTime, inputTokens: data.usage.inputTokens, outputTokens: data.usage.outputTokens, cacheCreationTokens: data.usage.cacheCreationInputTokens, cacheReadTokens: data.usage.cacheReadInputTokens, traceId: traceCtx.traceId, spanId: traceCtx.spanId, }); logProxyBody({ phase: "upstream_response", headers: responseHeaders, body: data.rawText ?? "", bodySize: data.totalBytesReceived, contentType: responseHeaders["content-type"] ?? "text/event-stream", account: "passthrough", accountType: "passthrough", attempt: 1, responseStatus: 200, durationMs: Date.now() - requestStartTime, }); logProxyBody({ phase: "client_response", headers: responseHeaders, body: clientBody.text, bodySize: clientBody.totalBytes, contentType: responseHeaders["content-type"] ?? "text/event-stream", account: "passthrough", accountType: "passthrough", attempt: 1, responseStatus: 200, durationMs: Date.now() - requestStartTime, }); }) .catch((error) => { capturedTracer.setError("stream_error", error instanceof Error ? error.message : String(error)); capturedUpstreamSpan?.end(); capturedTracer.end(500, Date.now() - requestStartTime); const traceCtx = capturedTracer.getTraceContext(); logRequest({ timestamp: new Date().toISOString(), requestId: ctx.requestId, method: ctx.method, path: ctx.path, model: body.model, stream: true, toolCount, account: "passthrough", accountType: "passthrough", responseStatus: 500, responseTimeMs: Date.now() - requestStartTime, errorType: "stream_error", errorMessage: error instanceof Error ? error.message : String(error), traceId: traceCtx.traceId, spanId: traceCtx.spanId, }); }); } catch { // Streaming capture is best-effort. } } else { clientCapture .then((clientBody) => { logProxyBody({ phase: "upstream_response", headers: responseHeaders, body: clientBody.text, bodySize: clientBody.totalBytes, contentType: responseHeaders["content-type"] ?? "text/event-stream", account: "passthrough", accountType: "passthrough", attempt: 1, responseStatus: 200, durationMs: Date.now() - requestStartTime, }); logProxyBody({ phase: "client_response", headers: responseHeaders, body: clientBody.text, bodySize: clientBody.totalBytes, contentType: responseHeaders["content-type"] ?? "text/event-stream", account: "passthrough", accountType: "passthrough", attempt: 1, responseStatus: 200, durationMs: Date.now() - requestStartTime, }); }) .catch(() => { // Non-fatal }); } const clientStream = streamSource.pipeThrough(clientCaptureStream); return new Response(clientStream, { status: response.status, headers: responseHeaders, }); } async function handleClaudePassthroughJsonResponse(args) { const { ctx, body, bodyStr, response, tracer, requestStartTime, toolCount, upstreamSpan, upstreamResponseHeaders, logProxyBody, } = args; const responseText = await response.text(); tracer?.logUpstreamResponseBody(responseText); logProxyBody({ phase: "upstream_response", headers: upstreamResponseHeaders, body: responseText, bodySize: Buffer.byteLength(responseText, "utf8"), contentType: upstreamResponseHeaders["content-type"] ?? "application/json", account: "passthrough", accountType: "passthrough", attempt: 1, responseStatus: response.status, durationMs: Date.now() - requestStartTime, }); logProxyBody({ phase: "client_response", headers: upstreamResponseHeaders, body: responseText, bodySize: Buffer.byteLength(responseText, "utf8"), contentType: upstreamResponseHeaders["content-type"] ?? "application/json", account: "passthrough", accountType: "passthrough", attempt: 1, responseStatus: response.status, durationMs: Date.now() - requestStartTime, }); const responseJson = JSON.parse(responseText); if (tracer && responseJson && typeof responseJson === "object") { const usage = responseJson.usage; if (usage) { tracer.setUsage({ inputTokens: usage.input_tokens ?? 0, outputTokens: usage.output_tokens ?? 0, cacheCreationTokens: usage.cache_creation_input_tokens ?? 0, cacheReadTokens: usage.cache_read_input_tokens ?? 0, }); const rateLimit5h = parseFloat(response.headers.get("anthropic-ratelimit-unified-5h-utilization") ?? ""); const rateLimit7d = parseFloat(response.headers.get("anthropic-ratelimit-unified-7d-utilization") ?? ""); if (!isNaN(rateLimit5h) || !isNaN(rateLimit7d)) { const usageWithRates = { inputTokens: usage.input_tokens ?? 0, outputTokens: usage.output_tokens ?? 0, cacheCreationTokens: usage.cache_creation_input_tokens ?? 0, cacheReadTokens: usage.cache_read_input_tokens ?? 0, }; if (!isNaN(rateLimit5h)) { usageWithRates.rateLimitAfter5h = rateLimit5h; } if (!isNaN(rateLimit7d)) { usageWithRates.rateLimitAfter7d = rateLimit7d; } tracer.setUsage(usageWithRates); } } tracer.recordMetrics(); const responseJsonStr = JSON.stringify(responseJson); tracer.recordBodySizes(bodyStr.length, responseJsonStr.length); upstreamSpan?.end(); tracer.end(response.status, Date.now() - requestStartTime); const traceCtx = tracer.getTraceContext(); logRequest({ timestamp: new Date().toISOString(), requestId: ctx.requestId, method: ctx.method, path: ctx.path, model: body.model, stream: false, toolCount, account: "passthrough", accountType: "passthrough", responseStatus: response.status, responseTimeMs: Date.now() - requestStartTime, inputTokens: usage?.input_tokens, outputTokens: usage?.output_tokens, cacheCreationTokens: usage?.cache_creation_input_tokens, cacheReadTokens: usage?.cache_read_input_tokens, traceId: traceCtx.traceId, spanId: traceCtx.spanId, }); } else { upstreamSpan?.end(); tracer?.end(response.status, Date.now() - requestStartTime); logRequest({ timestamp: new Date().toISOString(), requestId: ctx.requestId, method: ctx.method, path: ctx.path, model: body.model, stream: false, toolCount, account: "passthrough", accountType: "passthrough", responseStatus: response.status, responseTimeMs: Date.now() - requestStartTime, }); } return responseJson; } async function loadClaudeProxyAccounts(args) { const { ctx, body, tracer, requestStartTime, accountStrategy, buildLoggedClaudeError, } = args; const fs = await import("fs"); const os = await import("os"); const accounts = []; const legacyCredPath = `${os.homedir()}/.neurolink/anthropic-credentials.json`; const { tokenStore } = await import("../../auth/tokenStore.js"); if (!startupPruneDone) { await tokenStore.pruneExpired(); startupPruneDone = true; } const compoundKeys = await tokenStore.listByPrefix("anthropic:"); for (const key of compoundKeys) { if (await tokenStore.isDisabled(key)) { const existingState = getOrCreateRuntimeState(key); const tokens = await tokenStore.loadTokens(key); const hasTrackedTokens = existingState.lastToken !== undefined && existingState.lastToken !== ""; const tokenChanged = tokens && hasTrackedTokens && (existingState.lastToken !== tokens.accessToken || existingState.lastRefreshToken !== tokens.refreshToken); if (tokenChanged) { await tokenStore.markEnabled(key); logger.always(`[proxy] account=${key.split(":")[1] ?? key} re-enabled (credentials