@juspay/neurolink
Version:
Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio
1,149 lines • 165 kB
JavaScript
/**
* Claude-Compatible Proxy Routes
*
* Exposes Anthropic-compatible /v1/messages, /v1/models, and /v1/messages/count_tokens
* endpoints. ALL requests are routed through ctx.neurolink.generate() / ctx.neurolink.stream()
* -- no direct HTTP calls to Anthropic.
*
* An optional ModelRouter can remap incoming model names to different
* provider/model pairs (e.g. "claude-sonnet-4-20250514" -> vertex/gemini-2.5-pro).
* Without a router, models are passed through to the Anthropic provider.
*/
import { access, readFile } from "node:fs/promises";
import { homedir } from "node:os";
import { join } from "node:path";
import { buildStableClaudeCodeBillingHeader, CLAUDE_CLI_USER_AGENT, CLAUDE_CODE_OAUTH_BETAS, getOrCreateClaudeCodeIdentity, parseClaudeCodeUserId, } from "../../auth/anthropicOAuth.js";
import { parseQuotaHeaders, saveAccountQuota, } from "../../proxy/accountQuota.js";
import { buildClaudeError, ClaudeStreamSerializer, generateToolUseId, parseClaudeRequest, serializeClaudeResponse, } from "../../proxy/claudeFormat.js";
import { tracers } from "../../telemetry/tracers.js";
import { withSpan } from "../../telemetry/withSpan.js";
import { ProxyTracer, recordFallbackAttempt } from "../../proxy/proxyTracer.js";
import { createRawStreamCapture } from "../../proxy/rawStreamCapture.js";
import { logBodyCapture, logRequest, logRequestAttempt, logStreamError, } from "../../proxy/requestLogger.js";
import { createSSEInterceptor } from "../../proxy/sseInterceptor.js";
import { needsRefresh, persistTokens, refreshToken, } from "../../proxy/tokenRefresh.js";
import { buildProxyTranslationPlan, parseRetryAfterMs, } from "../../proxy/routingPolicy.js";
import { writeJsonSnapshotAtomically } from "../../proxy/snapshotPersistence.js";
import { recordAttempt, recordAttemptError, recordFinalError, recordFinalSuccess, } from "../../proxy/usageStats.js";
import { logger } from "../../utils/logger.js";
import { ProviderHealthChecker } from "../../utils/providerHealth.js";
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------
/** Headers that must never be forwarded upstream to Anthropic. */
const BLOCKED_UPSTREAM_HEADERS = new Set([
"cookie",
"proxy-authorization",
"host",
"connection",
"content-length",
"transfer-encoding",
]);
// ---------------------------------------------------------------------------
// Module-level state
// ---------------------------------------------------------------------------
/** Fill-first: index of the current primary account. Only advances when
* the current account exhausts 429 retries or auth retries fail. */
let primaryAccountIndex = 0;
/** Track account count so we can reset primaryAccountIndex when it changes. */
let lastKnownAccountCount = 0;
/** Stable account key (e.g. "anthropic:user@example.com") of the configured
* home/primary account. Set once at proxy boot from routing.primaryAccount.
* When undefined, home semantics fall back to enabledAccounts[0] (insertion
* order) — preserves pre-existing behavior. */
let configuredPrimaryAccountKey;
const MAX_AUTH_RETRIES = 5;
const MAX_CONSECUTIVE_REFRESH_FAILURES = 15;
const MAX_TRANSIENT_SAME_ACCOUNT_RETRIES = 2;
const TRANSIENT_SAME_ACCOUNT_RETRY_DELAYS_MS = [250, 1_000];
/** Maximum upstream 429 attempts per account before rotating to the next account.
* Total attempts per account = this + 1 (the initial call plus this many retries). */
const MAX_RATE_LIMIT_SAME_ACCOUNT_RETRIES = 10;
/** Max time to sleep between 429 retries. Caps large upstream retry-after values
* so we don't hold the client connection open for minutes. */
const MAX_RATE_LIMIT_RETRY_DELAY_MS = 30_000;
/** Timeout for upstream requests to Anthropic. Must be generous enough
* to cover the full lifecycle of streaming responses, including extended
* thinking from Opus models (which can exceed 5 minutes for large contexts). */
const UPSTREAM_FETCH_TIMEOUT_MS = 15 * 60 * 1000; // 15 minutes
const accountRuntimeState = new Map();
/** Track whether we've run the one-time startup prune. */
let startupPruneDone = false;
/** Default cooling period when retries are exhausted and upstream didn't
* provide a retry-after header. Short enough to recover quickly, long
* enough to avoid immediately hammering the same account. */
const DEFAULT_COOLING_PERIOD_MS = 60_000;
/** Advance the primary account index when the current primary is exhausted
* (429 retries exhausted or auth failure). This is what makes fill-first work:
* we stick to one account until it's unusable. Only advances when the exhausted
* account IS the current primary; otherwise it's already a fallback. */
function advancePrimaryIfCurrent(accountKey, enabledCount, primaryAccountKey) {
if (enabledCount <= 1) {
return;
}
// Only advance if the cooled account is the current primary
if (accountKey !== primaryAccountKey) {
return;
}
primaryAccountIndex = (primaryAccountIndex + 1) % enabledCount;
}
/** Resolve the configured primary's stable key to its current index in the
* request's enabledAccounts list. Returns 0 (insertion-order fallback) when
* no key is configured or the key cannot be matched (account disabled/
* removed). The resolution is per-request because enabledAccounts membership
* can shift between requests. */
function resolveHomeIndex(enabledAccounts) {
if (!configuredPrimaryAccountKey) {
return 0;
}
const idx = enabledAccounts.findIndex((a) => a.key === configuredPrimaryAccountKey);
return idx >= 0 ? idx : 0;
}
/** If the configured home primary is no longer cooling, reset
* primaryAccountIndex back to its index so traffic returns to the preferred
* account once its rate limit window expires. Called at the start of each
* request. Home is resolved fresh per call via resolveHomeIndex. */
function maybeResetPrimaryToHome(enabledAccounts) {
if (enabledAccounts.length <= 1) {
return;
}
const homeIndex = resolveHomeIndex(enabledAccounts);
if (primaryAccountIndex === homeIndex) {
return;
}
const homeAccount = enabledAccounts[homeIndex];
const homeState = accountRuntimeState.get(homeAccount.key);
if (!homeState ||
!homeState.coolingUntil ||
Date.now() >= homeState.coolingUntil) {
// Home account is no longer cooling — reset to it
primaryAccountIndex = homeIndex;
if (homeState?.coolingUntil) {
homeState.coolingUntil = undefined;
logger.always(`[proxy] home primary account=${homeAccount.label} cooling expired, resetting primaryAccountIndex to ${homeIndex}`);
}
}
}
/** Check if an account is currently in its cooling window. */
function isAccountCooling(accountKey) {
const state = accountRuntimeState.get(accountKey);
return !!state?.coolingUntil && Date.now() < state.coolingUntil;
}
// ---------------------------------------------------------------------------
// OAuth polyfill helpers (extracted to reduce block nesting)
// ---------------------------------------------------------------------------
const snapshotCache = new Map();
const SNAPSHOT_CACHE_TTL_MS = 5 * 60 * 1000; // 5 minutes
const SNAPSHOT_STABLE_HEADERS = new Set([
"accept",
"accept-encoding",
"accept-language",
"anthropic-beta",
"anthropic-dangerous-direct-browser-access",
"anthropic-version",
"sec-fetch-mode",
"user-agent",
"x-app",
"x-stainless-arch",
"x-stainless-lang",
"x-stainless-os",
"x-stainless-package-version",
"x-stainless-retry-count",
"x-stainless-runtime",
"x-stainless-runtime-version",
"x-stainless-timeout",
"x-subscription-tier",
]);
const NON_CLAUDE_OAUTH_BETAS = [
"oauth-2025-04-20",
"claude-code-20250219",
"fine-grained-tool-streaming-2025-05-14",
];
function getSnapshotSafeLabel(accountLabel) {
return accountLabel.replace(/[^a-zA-Z0-9._@-]/g, "_");
}
function getSnapshotPath(accountLabel) {
return join(homedir(), ".neurolink", "header-snapshots", `anthropic_${getSnapshotSafeLabel(accountLabel)}.json`);
}
function applySnapshotHeaders(headers, snapshot) {
if (!snapshot?.headers) {
return;
}
for (const [sk, sv] of Object.entries(snapshot.headers)) {
const lower = sk.toLowerCase();
if (typeof sv === "string" &&
!headers[lower] &&
!BLOCKED_UPSTREAM_HEADERS.has(lower) &&
lower !== "authorization" &&
lower !== "x-api-key" &&
lower !== "x-claude-code-session-id") {
headers[lower] = sv;
}
}
}
async function loadClaudeSnapshot(accountLabel) {
try {
const safeLabel = getSnapshotSafeLabel(accountLabel);
const cached = snapshotCache.get(safeLabel);
if (cached && Date.now() - cached.loadedAt < SNAPSHOT_CACHE_TTL_MS) {
return cached.snapshot;
}
const snapshotPath = getSnapshotPath(accountLabel);
try {
await access(snapshotPath);
}
catch {
return null;
}
const snapshot = JSON.parse(await readFile(snapshotPath, "utf8"));
if (!snapshot || typeof snapshot !== "object") {
return null;
}
const normalized = {
accountKey: "accountKey" in snapshot && typeof snapshot.accountKey === "string"
? snapshot.accountKey
: `anthropic:${accountLabel}`,
capturedAt: "capturedAt" in snapshot && typeof snapshot.capturedAt === "string"
? snapshot.capturedAt
: new Date(0).toISOString(),
source: "claude-code",
headers: "headers" in snapshot && snapshot.headers ? snapshot.headers : {},
...(snapshot.body ? { body: snapshot.body } : {}),
};
if (Object.keys(normalized.headers).length === 0 &&
Object.keys(normalized.body ?? {}).length === 0) {
return null;
}
snapshotCache.set(safeLabel, {
snapshot: normalized,
loadedAt: Date.now(),
});
return normalized;
}
catch {
return null;
}
}
function buildSnapshotHeaders(headers, existingHeaders) {
const merged = { ...(existingHeaders ?? {}) };
for (const [key, value] of Object.entries(headers)) {
const lower = key.toLowerCase();
if (typeof value === "string" &&
SNAPSHOT_STABLE_HEADERS.has(lower) &&
!BLOCKED_UPSTREAM_HEADERS.has(lower) &&
lower !== "authorization" &&
lower !== "x-api-key" &&
lower !== "x-claude-code-session-id") {
merged[lower] = value;
}
}
return merged;
}
function extractSnapshotBody(body) {
if (!body || typeof body !== "object") {
return undefined;
}
const parsed = body;
const identity = parseClaudeCodeUserId(parsed.metadata?.user_id);
const systemBlocks = Array.isArray(parsed.system)
? parsed.system
: typeof parsed.system === "string"
? [{ type: "text", text: parsed.system }]
: [];
const billingHeader = systemBlocks.find((block) => typeof block?.text === "string" &&
block.text.includes("x-anthropic-billing-header"))?.text;
const agentBlock = systemBlocks.find((block) => typeof block?.text === "string" &&
block.text.includes("Claude Agent SDK"))?.text;
if (!identity && !billingHeader && !agentBlock) {
return undefined;
}
return {
...(identity ? { metadataUserId: identity.metadataUserId } : {}),
...(identity ? { sessionId: identity.sessionId } : {}),
...(billingHeader ? { billingHeader } : {}),
...(agentBlock ? { agentBlock } : {}),
};
}
function isLikelyClaudeClient(headers, snapshotBody) {
return (typeof headers["x-claude-code-session-id"] === "string" ||
headers["user-agent"]?.startsWith("claude-cli/") ||
!!snapshotBody?.metadataUserId ||
!!snapshotBody?.billingHeader ||
!!snapshotBody?.agentBlock);
}
function snapshotsMatch(existing, next) {
if (!existing) {
return false;
}
return (JSON.stringify(existing.headers ?? {}) ===
JSON.stringify(next.headers ?? {}) &&
JSON.stringify(existing.body ?? {}) === JSON.stringify(next.body ?? {}));
}
async function persistClaudeSnapshot(accountLabel, snapshot) {
const snapshotPath = getSnapshotPath(accountLabel);
await writeJsonSnapshotAtomically(snapshotPath, snapshot, 0o600);
snapshotCache.set(getSnapshotSafeLabel(accountLabel), {
snapshot,
loadedAt: Date.now(),
});
}
async function maybeRefreshClaudeSnapshot(accountLabel, accountKey, headers, bodyStr) {
const existing = await loadClaudeSnapshot(accountLabel);
let parsedBody;
try {
parsedBody = JSON.parse(bodyStr);
}
catch {
return existing;
}
const body = extractSnapshotBody(parsedBody);
if (!isLikelyClaudeClient(headers, body)) {
return existing;
}
const next = {
accountKey,
capturedAt: new Date().toISOString(),
source: "claude-code",
headers: buildSnapshotHeaders(headers, existing?.headers),
body: {
...(existing?.body ?? {}),
...(body ?? {}),
...(typeof headers["x-claude-code-session-id"] === "string"
? { sessionId: headers["x-claude-code-session-id"] }
: {}),
},
};
if (snapshotsMatch(existing, next)) {
return existing;
}
try {
await persistClaudeSnapshot(accountLabel, next);
}
catch (error) {
logger.warn("[proxy] failed to persist Claude snapshot", {
accountLabel,
error: error instanceof Error ? error.message : String(error),
});
snapshotCache.set(getSnapshotSafeLabel(accountLabel), {
snapshot: next,
loadedAt: Date.now(),
});
}
return next;
}
/**
* Polyfill the request body for OAuth accounts.
* Claude Code injects a billing header, agent block, and metadata.user_id
* into the body. Non-CC clients (Curator, custom apps) don't send these —
* Anthropic rejects without them.
*/
function polyfillOAuthBody(bodyStr, accountToken, snapshot, preferredSessionId) {
try {
const parsed = JSON.parse(bodyStr);
// Billing header block (required by Anthropic for OAuth)
// NOTE: This block MUST be deterministic (no random values) to preserve
// Anthropic's prompt caching prefix chain. We keep the real Claude Code
// version/entrypoint shape when present, but stabilize the volatile cch.
const agentBlock = {
type: "text",
text: snapshot?.body?.agentBlock ||
"You are a Claude agent, built on Anthropic's Claude Agent SDK.",
};
// Normalise system to array and APPEND billing + agent blocks.
// IMPORTANT: We append (not prepend) to preserve the client's cache
// prefix chain. Anthropic's prompt caching uses prefix matching — if we
// insert anything before the client's system blocks, we invalidate all
// cached content (tools, system prompt, message history).
//
// Claude Code sends a billing block with a `cch=<hash>` value that changes
// on every request. We fix this by:
// 1. Removing the client's billing block from its current position
// 2. Stabilizing it while keeping the official Claude Code shape
// 3. Appending it at the END so the cacheable system blocks stay
// at the front of the prefix chain
if (parsed.system) {
if (typeof parsed.system === "string") {
parsed.system = [{ type: "text", text: parsed.system }];
}
if (Array.isArray(parsed.system)) {
// Find and remove existing billing/agent blocks from wherever
// the client placed them (typically at system[0])
const billingIdx = parsed.system.findIndex((b) => typeof b.text === "string" &&
b.text.includes("x-anthropic-billing-header"));
const agentIdx = parsed.system.findIndex((b) => typeof b.text === "string" && b.text.includes("Claude Agent SDK"));
const billingBlock = {
type: "text",
text: buildStableClaudeCodeBillingHeader(parsed.system[billingIdx]?.text ?? snapshot?.body?.billingHeader),
};
// Remove in reverse index order so indices stay valid
const indicesToRemove = [billingIdx, agentIdx]
.filter((i) => i >= 0)
.sort((a, b) => b - a);
for (const idx of indicesToRemove) {
parsed.system.splice(idx, 1);
}
// Always append a deterministic billing block at the end.
// If the client sent one, we stripped its dynamic cch= and use
// our stable version instead. If not, we add ours.
parsed.system = [...parsed.system, billingBlock, agentBlock];
}
}
else {
const billingBlock = {
type: "text",
text: buildStableClaudeCodeBillingHeader(snapshot?.body?.billingHeader),
};
parsed.system = [billingBlock, agentBlock];
}
// Inject Claude-Code-shaped metadata.user_id (required for OAuth).
const tokenPrefix = accountToken.substring(0, Math.min(20, accountToken.length));
const identity = getOrCreateClaudeCodeIdentity(tokenPrefix, {
existingUserId: parsed.metadata?.user_id ?? snapshot?.body?.metadataUserId,
preferredSessionId: preferredSessionId ?? snapshot?.body?.sessionId,
});
parsed.metadata = {
...parsed.metadata,
user_id: identity.metadataUserId,
};
return { bodyStr: JSON.stringify(parsed), sessionId: identity.sessionId };
}
catch {
return { bodyStr }; // JSON parse failed — use original body
}
}
// ---------------------------------------------------------------------------
// Legacy credential refresh helper (extracted to reduce block nesting)
// ---------------------------------------------------------------------------
async function tryLoadLegacyAccount(creds, legacyCredPath) {
if (!creds.oauth?.accessToken) {
return undefined;
}
let legacyToken = creds.oauth.accessToken;
let legacyRefresh = creds.oauth.refreshToken;
let legacyExpiry = creds.oauth.expiresAt;
const legacyExpired = legacyExpiry ? legacyExpiry < Date.now() : false;
if (!legacyExpired) {
return {
key: "anthropic:legacy-default",
label: "default",
token: legacyToken,
refreshToken: legacyRefresh,
expiresAt: legacyExpiry,
type: "oauth",
persistTarget: { credPath: legacyCredPath },
};
}
if (!legacyRefresh) {
logger.always("[proxy] skipping legacy account (expired, no refresh token)");
return undefined;
}
const tmp = {
token: legacyToken,
refreshToken: legacyRefresh,
expiresAt: legacyExpiry,
label: "default",
};
const ok = await refreshToken(tmp);
if (!ok.success) {
logger.always(`[proxy] skipping legacy account (expired, refresh failed: ${ok.error?.slice(0, 200) ?? "unknown"})`);
return undefined;
}
legacyToken = tmp.token;
legacyRefresh = tmp.refreshToken;
legacyExpiry = tmp.expiresAt;
await persistTokens(legacyCredPath, tmp);
logger.always("[proxy] refreshed legacy account at startup");
return {
key: "anthropic:legacy-default",
label: "default",
token: legacyToken,
refreshToken: legacyRefresh,
expiresAt: legacyExpiry,
type: "oauth",
persistTarget: { credPath: legacyCredPath },
};
}
async function handleTranslatedClaudeRequest(args) {
const { ctx, body, route, modelRouter, tracer, requestStartTime, logProxyBody, } = args;
tracer?.setMode("full");
const parsed = parseClaudeRequest(body);
const plan = buildProxyTranslationPlan({
provider: route.provider,
model: route.model,
}, modelRouter?.getFallbackChain() ?? [], body.model, parsed);
logProxyRoutingPlan(logProxyBody, "translated_request", plan);
const attempts = plan.attempts;
if (body.stream) {
return handleTranslatedClaudeStreamRequest({
ctx,
body,
attempts,
parsed,
tracer,
requestStartTime,
});
}
return handleTranslatedClaudeJsonRequest({
ctx,
body,
attempts,
parsed,
tracer,
requestStartTime,
logProxyBody,
});
}
function logProxyRoutingPlan(logProxyBody, stage, plan) {
logProxyBody({
phase: "routing_decision",
contentType: "application/json",
body: {
stage,
attempts: plan.attempts,
},
});
}
async function handleTranslatedClaudeStreamRequest(args) {
const { ctx, body, attempts, parsed, tracer, requestStartTime } = args;
const serializer = new ClaudeStreamSerializer(body.model, 0);
const KEEPALIVE_INTERVAL_MS = 15_000;
const encoder = new TextEncoder();
let translationKeepAliveTimer;
let translationCancelled = false;
let translationSucceeded = false;
let translatedModel;
let finalStreamError = "No translation providers succeeded";
let upstreamIterator;
const translationStream = new ReadableStream({
async start(controller) {
for (const frame of serializer.start()) {
controller.enqueue(encoder.encode(frame));
}
translationKeepAliveTimer = setInterval(() => {
try {
controller.enqueue(encoder.encode(": keep-alive\n\n"));
}
catch {
// Controller already closed.
}
}, KEEPALIVE_INTERVAL_MS);
try {
for (let attemptIndex = 0; attemptIndex < attempts.length; attemptIndex++) {
const attempt = attempts[attemptIndex];
if (attemptIndex > 0) {
logger.always(`[proxy] fallback → ${attempt.label}`);
}
let collectedText = "";
try {
const options = buildProxyFallbackOptions(parsed, attempt.provider
? {
provider: attempt.provider,
model: attempt.model,
}
: {});
const streamResult = await ctx.neurolink.stream(options);
const iterable = streamResult.stream;
upstreamIterator = iterable[Symbol.asyncIterator]();
while (true) {
if (translationCancelled) {
break;
}
const { value: chunk, done } = await upstreamIterator.next();
if (done || translationCancelled) {
break;
}
const text = extractText(chunk);
if (text) {
collectedText += text;
for (const frame of serializer.pushDelta(text)) {
controller.enqueue(encoder.encode(frame));
}
}
}
const toolCalls = streamResult.toolCalls ?? [];
if (!hasTranslatedOutput(collectedText, toolCalls)) {
finalStreamError = `Translated provider ${attempt.label} returned no content or tool calls`;
logger.debug(`[proxy] translation attempt ${attempt.label} returned no content or tool calls`);
continue;
}
if (!translationCancelled && toolCalls.length) {
for (const toolCall of toolCalls) {
const toolName = toolCall.toolName ??
toolCall.name ??
"unknown";
for (const frame of serializer.pushToolUse(generateToolUseId(), toolName, extractToolArgs(toolCall))) {
controller.enqueue(encoder.encode(frame));
}
}
}
if (!translationCancelled) {
const reason = streamResult.finishReason ?? "end_turn";
const resolvedUsage = extractUsageFromStreamResult(streamResult.usage);
for (const frame of serializer.finish(resolvedUsage.output, reason)) {
controller.enqueue(encoder.encode(frame));
}
}
translatedModel = streamResult.model;
translationSucceeded = true;
return;
}
catch (streamErr) {
if (translationCancelled) {
return;
}
finalStreamError =
streamErr instanceof Error
? streamErr.message
: String(streamErr);
if (collectedText.trim().length > 0) {
logger.always(`[proxy] mid-stream error (translation mode): ${finalStreamError}`);
const errorEvent = `event: error\ndata: ${JSON.stringify({ type: "error", error: { type: "api_error", message: `Upstream stream interrupted: ${finalStreamError}` } })}\n\n`;
controller.enqueue(encoder.encode(errorEvent));
return;
}
logger.debug(`[proxy] translation attempt ${attempt.label} failed: ${finalStreamError}`);
}
}
if (!translationCancelled) {
logger.always(`[proxy] mid-stream error (translation mode): ${finalStreamError}`);
const errorEvent = `event: error\ndata: ${JSON.stringify({ type: "error", error: { type: "api_error", message: `Upstream stream interrupted: ${finalStreamError}` } })}\n\n`;
controller.enqueue(encoder.encode(errorEvent));
}
}
finally {
if (translationKeepAliveTimer) {
clearInterval(translationKeepAliveTimer);
}
if (!translationCancelled) {
controller.close();
}
if (tracer && translatedModel && translatedModel !== body.model) {
tracer.setModelSubstitution(body.model, translatedModel);
}
if (!translationSucceeded) {
tracer?.setError("generation_error", finalStreamError.slice(0, 500));
}
tracer?.end(200, Date.now() - requestStartTime);
}
},
cancel() {
translationCancelled = true;
if (translationKeepAliveTimer) {
clearInterval(translationKeepAliveTimer);
translationKeepAliveTimer = undefined;
}
if (upstreamIterator?.return) {
upstreamIterator.return(undefined).catch((cancelErr) => {
logger.debug(`[proxy] upstream cancel error: ${cancelErr instanceof Error ? cancelErr.message : String(cancelErr)}`);
});
}
},
});
return new Response(translationStream, {
headers: {
"content-type": "text/event-stream",
"cache-control": "no-cache",
connection: "keep-alive",
},
});
}
async function handleTranslatedClaudeJsonRequest(args) {
const { ctx, body, attempts, parsed, tracer, requestStartTime, logProxyBody, } = args;
let lastAttemptError = "No translation providers succeeded";
for (let attemptIndex = 0; attemptIndex < attempts.length; attemptIndex++) {
const attempt = attempts[attemptIndex];
if (attemptIndex > 0) {
logger.always(`[proxy] fallback → ${attempt.label}`);
}
try {
const options = buildProxyFallbackOptions(parsed, attempt.provider
? {
provider: attempt.provider,
model: attempt.model,
}
: {});
const streamResult = await ctx.neurolink.stream(options);
let collectedText = "";
for await (const chunk of streamResult.stream) {
const text = extractText(chunk);
if (text) {
collectedText += text;
}
}
if (!hasTranslatedOutput(collectedText, streamResult.toolCalls)) {
lastAttemptError = `Translated provider ${attempt.label} returned no content or tool calls`;
logger.debug(`[proxy] translation attempt ${attempt.label} returned no content or tool calls`);
continue;
}
const internal = {
content: collectedText,
model: streamResult.model,
finishReason: streamResult.finishReason ?? "end_turn",
reasoning: undefined,
usage: streamResult.usage
? extractUsageFromStreamResult(streamResult.usage)
: undefined,
toolCalls: streamResult.toolCalls,
};
if (tracer && streamResult.model && streamResult.model !== body.model) {
tracer.setModelSubstitution(body.model, streamResult.model);
}
tracer?.end(200, Date.now() - requestStartTime);
const clientResponse = serializeClaudeResponse(internal, body.model);
const clientResponseText = JSON.stringify(clientResponse);
logProxyBody({
phase: "client_response",
headers: { "content-type": "application/json" },
body: clientResponseText,
bodySize: Buffer.byteLength(clientResponseText, "utf8"),
contentType: "application/json",
responseStatus: 200,
durationMs: Date.now() - requestStartTime,
});
return clientResponse;
}
catch (attemptError) {
lastAttemptError =
attemptError instanceof Error
? attemptError.message
: String(attemptError);
logger.debug(`[proxy] translation attempt ${attempt.label} failed: ${lastAttemptError}`);
}
}
throw new Error(lastAttemptError);
}
async function handleClaudePassthroughRequest(args) {
const { ctx, body, clientRequestBody, tracer, requestStartTime, logProxyBody, } = args;
tracer?.setMode("passthrough-cli");
const bodyStr = clientRequestBody;
const toolCount = Array.isArray(body.tools) ? body.tools.length : 0;
const upstreamHeaders = {};
for (const [key, value] of Object.entries(ctx.headers)) {
if (!BLOCKED_UPSTREAM_HEADERS.has(key.toLowerCase()) && value) {
upstreamHeaders[key] = value;
}
}
if (!upstreamHeaders["content-type"]) {
upstreamHeaders["content-type"] = "application/json";
}
const upstreamSpan = tracer?.startUpstreamAttempt({
account: "passthrough",
attempt: 1,
polyfillHeaders: false,
polyfillBody: false,
upstreamUrl: "https://api.anthropic.com/v1/messages?beta=true",
});
tracer?.logUpstreamRequestHeaders(upstreamHeaders);
tracer?.logUpstreamRequestBody(bodyStr);
logProxyBody({
phase: "upstream_request",
headers: upstreamHeaders,
body: bodyStr,
bodySize: Buffer.byteLength(bodyStr, "utf8"),
contentType: upstreamHeaders["content-type"] ?? "application/json",
account: "passthrough",
accountType: "passthrough",
attempt: 1,
});
let response;
try {
response = await fetch("https://api.anthropic.com/v1/messages?beta=true", {
method: "POST",
headers: upstreamHeaders,
body: bodyStr,
signal: AbortSignal.timeout(UPSTREAM_FETCH_TIMEOUT_MS),
});
}
catch (fetchErr) {
const errMsg = fetchErr instanceof Error ? fetchErr.message : String(fetchErr);
tracer?.setError("network_error", errMsg);
upstreamSpan?.end();
tracer?.end(502, Date.now() - requestStartTime);
logRequest({
timestamp: new Date().toISOString(),
requestId: ctx.requestId,
method: ctx.method,
path: ctx.path,
model: body.model,
stream: body.stream ?? false,
toolCount,
account: "passthrough",
accountType: "passthrough",
responseStatus: 502,
responseTimeMs: Date.now() - requestStartTime,
errorType: "network_error",
errorMessage: errMsg,
});
const errorBody = buildClaudeError(502, `Passthrough fetch failed: ${errMsg}`);
const errorBodyText = JSON.stringify(errorBody);
logProxyBody({
phase: "client_response",
headers: { "content-type": "application/json" },
body: errorBodyText,
bodySize: Buffer.byteLength(errorBodyText, "utf8"),
contentType: "application/json",
account: "passthrough",
accountType: "passthrough",
attempt: 1,
responseStatus: 502,
durationMs: Date.now() - requestStartTime,
});
return errorBody;
}
const upstreamResponseHeaders = {};
response.headers.forEach((value, key) => {
upstreamResponseHeaders[key] = value;
});
tracer?.logUpstreamResponseHeaders(upstreamResponseHeaders);
if (!response.ok) {
const errorText = await response.text();
tracer?.logUpstreamResponseBody(errorText);
logProxyBody({
phase: "upstream_response",
headers: upstreamResponseHeaders,
body: errorText,
bodySize: Buffer.byteLength(errorText, "utf8"),
contentType: upstreamResponseHeaders["content-type"] ?? "application/json",
account: "passthrough",
accountType: "passthrough",
attempt: 1,
responseStatus: response.status,
durationMs: Date.now() - requestStartTime,
});
logProxyBody({
phase: "client_response",
headers: upstreamResponseHeaders,
body: errorText,
bodySize: Buffer.byteLength(errorText, "utf8"),
contentType: upstreamResponseHeaders["content-type"] ?? "application/json",
account: "passthrough",
accountType: "passthrough",
attempt: 1,
responseStatus: response.status,
durationMs: Date.now() - requestStartTime,
});
upstreamSpan?.end();
tracer?.setError("api_error", errorText.slice(0, 500));
tracer?.end(response.status, Date.now() - requestStartTime);
try {
return JSON.parse(errorText);
}
catch {
return buildClaudeError(response.status, errorText);
}
}
if (body.stream && response.body) {
return handleClaudePassthroughStreamResponse({
ctx,
body,
bodyStr,
response,
tracer,
requestStartTime,
toolCount,
upstreamSpan,
upstreamResponseHeaders,
logProxyBody,
});
}
return handleClaudePassthroughJsonResponse({
ctx,
body,
bodyStr,
response,
tracer,
requestStartTime,
toolCount,
upstreamSpan,
upstreamResponseHeaders,
logProxyBody,
});
}
async function handleClaudePassthroughStreamResponse(args) {
const { ctx, body, bodyStr, response, tracer, requestStartTime, toolCount, upstreamSpan, upstreamResponseHeaders, logProxyBody, } = args;
const responseHeaders = { ...upstreamResponseHeaders };
const { stream: clientCaptureStream, capture: clientCapture } = createRawStreamCapture();
const responseBody = response.body;
if (!responseBody) {
throw new Error("Expected passthrough stream response body");
}
let streamSource = responseBody;
if (tracer) {
try {
const { stream: interceptor, telemetry } = createSSEInterceptor({
captureRawText: true,
});
streamSource = streamSource.pipeThrough(interceptor);
const capturedTracer = tracer;
const capturedUpstreamSpan = upstreamSpan;
const capturedResponse = response;
const capturedRequestBytes = bodyStr.length;
Promise.all([telemetry, clientCapture])
.then(([data, clientBody]) => {
capturedTracer.setUsage({
inputTokens: data.usage.inputTokens,
outputTokens: data.usage.outputTokens,
cacheCreationTokens: data.usage.cacheCreationInputTokens,
cacheReadTokens: data.usage.cacheReadInputTokens,
});
capturedTracer.logStreamEvents(data.events);
const rateLimit5h = parseFloat(capturedResponse.headers.get("anthropic-ratelimit-unified-5h-utilization") ?? "");
const rateLimit7d = parseFloat(capturedResponse.headers.get("anthropic-ratelimit-unified-7d-utilization") ?? "");
const usageUpdate = {
inputTokens: data.usage.inputTokens,
outputTokens: data.usage.outputTokens,
cacheCreationTokens: data.usage.cacheCreationInputTokens,
cacheReadTokens: data.usage.cacheReadInputTokens,
};
if (!isNaN(rateLimit5h)) {
usageUpdate.rateLimitAfter5h = rateLimit5h;
}
if (!isNaN(rateLimit7d)) {
usageUpdate.rateLimitAfter7d = rateLimit7d;
}
if (!isNaN(rateLimit5h) || !isNaN(rateLimit7d)) {
capturedTracer.setUsage(usageUpdate);
}
capturedTracer.logUpstreamResponseBody(data.rawText ?? "");
capturedTracer.recordMetrics();
capturedTracer.recordBodySizes(capturedRequestBytes, data.totalBytesReceived);
capturedUpstreamSpan?.end();
capturedTracer.end(200, Date.now() - requestStartTime);
const traceCtx = capturedTracer.getTraceContext();
logRequest({
timestamp: new Date().toISOString(),
requestId: ctx.requestId,
method: ctx.method,
path: ctx.path,
model: body.model,
stream: true,
toolCount,
account: "passthrough",
accountType: "passthrough",
responseStatus: 200,
responseTimeMs: Date.now() - requestStartTime,
inputTokens: data.usage.inputTokens,
outputTokens: data.usage.outputTokens,
cacheCreationTokens: data.usage.cacheCreationInputTokens,
cacheReadTokens: data.usage.cacheReadInputTokens,
traceId: traceCtx.traceId,
spanId: traceCtx.spanId,
});
logProxyBody({
phase: "upstream_response",
headers: responseHeaders,
body: data.rawText ?? "",
bodySize: data.totalBytesReceived,
contentType: responseHeaders["content-type"] ?? "text/event-stream",
account: "passthrough",
accountType: "passthrough",
attempt: 1,
responseStatus: 200,
durationMs: Date.now() - requestStartTime,
});
logProxyBody({
phase: "client_response",
headers: responseHeaders,
body: clientBody.text,
bodySize: clientBody.totalBytes,
contentType: responseHeaders["content-type"] ?? "text/event-stream",
account: "passthrough",
accountType: "passthrough",
attempt: 1,
responseStatus: 200,
durationMs: Date.now() - requestStartTime,
});
})
.catch((error) => {
capturedTracer.setError("stream_error", error instanceof Error ? error.message : String(error));
capturedUpstreamSpan?.end();
capturedTracer.end(500, Date.now() - requestStartTime);
const traceCtx = capturedTracer.getTraceContext();
logRequest({
timestamp: new Date().toISOString(),
requestId: ctx.requestId,
method: ctx.method,
path: ctx.path,
model: body.model,
stream: true,
toolCount,
account: "passthrough",
accountType: "passthrough",
responseStatus: 500,
responseTimeMs: Date.now() - requestStartTime,
errorType: "stream_error",
errorMessage: error instanceof Error ? error.message : String(error),
traceId: traceCtx.traceId,
spanId: traceCtx.spanId,
});
});
}
catch {
// Streaming capture is best-effort.
}
}
else {
clientCapture
.then((clientBody) => {
logProxyBody({
phase: "upstream_response",
headers: responseHeaders,
body: clientBody.text,
bodySize: clientBody.totalBytes,
contentType: responseHeaders["content-type"] ?? "text/event-stream",
account: "passthrough",
accountType: "passthrough",
attempt: 1,
responseStatus: 200,
durationMs: Date.now() - requestStartTime,
});
logProxyBody({
phase: "client_response",
headers: responseHeaders,
body: clientBody.text,
bodySize: clientBody.totalBytes,
contentType: responseHeaders["content-type"] ?? "text/event-stream",
account: "passthrough",
accountType: "passthrough",
attempt: 1,
responseStatus: 200,
durationMs: Date.now() - requestStartTime,
});
})
.catch(() => {
// Non-fatal
});
}
const clientStream = streamSource.pipeThrough(clientCaptureStream);
return new Response(clientStream, {
status: response.status,
headers: responseHeaders,
});
}
async function handleClaudePassthroughJsonResponse(args) {
const { ctx, body, bodyStr, response, tracer, requestStartTime, toolCount, upstreamSpan, upstreamResponseHeaders, logProxyBody, } = args;
const responseText = await response.text();
tracer?.logUpstreamResponseBody(responseText);
logProxyBody({
phase: "upstream_response",
headers: upstreamResponseHeaders,
body: responseText,
bodySize: Buffer.byteLength(responseText, "utf8"),
contentType: upstreamResponseHeaders["content-type"] ?? "application/json",
account: "passthrough",
accountType: "passthrough",
attempt: 1,
responseStatus: response.status,
durationMs: Date.now() - requestStartTime,
});
logProxyBody({
phase: "client_response",
headers: upstreamResponseHeaders,
body: responseText,
bodySize: Buffer.byteLength(responseText, "utf8"),
contentType: upstreamResponseHeaders["content-type"] ?? "application/json",
account: "passthrough",
accountType: "passthrough",
attempt: 1,
responseStatus: response.status,
durationMs: Date.now() - requestStartTime,
});
const responseJson = JSON.parse(responseText);
if (tracer && responseJson && typeof responseJson === "object") {
const usage = responseJson.usage;
if (usage) {
tracer.setUsage({
inputTokens: usage.input_tokens ?? 0,
outputTokens: usage.output_tokens ?? 0,
cacheCreationTokens: usage.cache_creation_input_tokens ?? 0,
cacheReadTokens: usage.cache_read_input_tokens ?? 0,
});
const rateLimit5h = parseFloat(response.headers.get("anthropic-ratelimit-unified-5h-utilization") ??
"");
const rateLimit7d = parseFloat(response.headers.get("anthropic-ratelimit-unified-7d-utilization") ??
"");
if (!isNaN(rateLimit5h) || !isNaN(rateLimit7d)) {
const usageWithRates = {
inputTokens: usage.input_tokens ?? 0,
outputTokens: usage.output_tokens ?? 0,
cacheCreationTokens: usage.cache_creation_input_tokens ?? 0,
cacheReadTokens: usage.cache_read_input_tokens ?? 0,
};
if (!isNaN(rateLimit5h)) {
usageWithRates.rateLimitAfter5h = rateLimit5h;
}
if (!isNaN(rateLimit7d)) {
usageWithRates.rateLimitAfter7d = rateLimit7d;
}
tracer.setUsage(usageWithRates);
}
}
tracer.recordMetrics();
const responseJsonStr = JSON.stringify(responseJson);
tracer.recordBodySizes(bodyStr.length, responseJsonStr.length);
upstreamSpan?.end();
tracer.end(response.status, Date.now() - requestStartTime);
const traceCtx = tracer.getTraceContext();
logRequest({
timestamp: new Date().toISOString(),
requestId: ctx.requestId,
method: ctx.method,
path: ctx.path,
model: body.model,
stream: false,
toolCount,
account: "passthrough",
accountType: "passthrough",
responseStatus: response.status,
responseTimeMs: Date.now() - requestStartTime,
inputTokens: usage?.input_tokens,
outputTokens: usage?.output_tokens,
cacheCreationTokens: usage?.cache_creation_input_tokens,
cacheReadTokens: usage?.cache_read_input_tokens,
traceId: traceCtx.traceId,
spanId: traceCtx.spanId,
});
}
else {
upstreamSpan?.end();
tracer?.end(response.status, Date.now() - requestStartTime);
logRequest({
timestamp: new Date().toISOString(),
requestId: ctx.requestId,
method: ctx.method,
path: ctx.path,
model: body.model,
stream: false,
toolCount,
account: "passthrough",
accountType: "passthrough",
responseStatus: response.status,
responseTimeMs: Date.now() - requestStartTime,
});
}
return responseJson;
}
async function loadClaudeProxyAccounts(args) {
const { ctx, body, tracer, requestStartTime, accountStrategy, buildLoggedClaudeError, } = args;
const fs = await import("fs");
const os = await import("os");
const accounts = [];
const legacyCredPath = `${os.homedir()}/.neurolink/anthropic-credentials.json`;
const { tokenStore } = await import("../../auth/tokenStore.js");
if (!startupPruneDone) {
await tokenStore.pruneExpired();
startupPruneDone = true;
}
const compoundKeys = await tokenStore.listByPrefix("anthropic:");
for (const key of compoundKeys) {
if (await tokenStore.isDisabled(key)) {
const existingState = getOrCreateRuntimeState(key);
const tokens = await tokenStore.loadTokens(key);
const hasTrackedTokens = existingState.lastToken !== undefined && existingState.lastToken !== "";
const tokenChanged = tokens &&
hasTrackedTokens &&
(existingState.lastToken !== tokens.accessToken ||
existingState.lastRefreshToken !== tokens.refreshToken);
if (tokenChanged) {
await tokenStore.markEnabled(key);
logger.always(`[proxy] account=${key.split(":")[1] ?? key} re-enabled (credentials