@juspay/neurolink
Version:
Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio
697 lines (696 loc) • 28.2 kB
JavaScript
/**
* Proxy Request Tracer
*
* Creates and manages OTel spans for the proxy request lifecycle.
* Provides a clean API for claudeProxyRoutes to trace each phase:
* receive -> account_selection -> upstream (per retry) -> stream -> end
*
* Uses the existing instrumentation infrastructure:
* - getTracer() from instrumentation.ts for span creation
* - setLangfuseContext() for Langfuse enrichment
* - OtelBridge for context propagation to/from upstream
* - SpanAttributes from spanTypes.ts for attribute naming
* - calculateCost() from pricing.ts for cost tracking
* - TelemetryService for metrics recording
*/
import { SpanStatusCode, context, metrics, trace, } from "@opentelemetry/api";
import { getTracer, setLangfuseContext, } from "../services/server/ai/observability/instrumentation.js";
import { OtelBridge } from "../observability/otelBridge.js";
import { calculateCost } from "../utils/pricing.js";
import { TelemetryService } from "../telemetry/telemetryService.js";
import { logger } from "../utils/logger.js";
const LOG_PREFIX = "[ProxyTracer]";
// ---------------------------------------------------------------------------
// OTEL Metric Instruments — lazy singleton
//
// The MeterProvider is registered in initializeOpenTelemetry() which runs
// *after* module import time. @opentelemetry/api v1.x getMeter() returns a
// NoopMeter if called before a real MeterProvider is set. We therefore
// defer instrument creation until the first ProxyTracer.end() call, at which
// point the MeterProvider is guaranteed to be registered.
// ---------------------------------------------------------------------------
let _metrics = null;
function getProxyMetrics() {
if (_metrics) {
return _metrics;
}
const meter = metrics.getMeter("neurolink.proxy", "1.0.0");
const createdMetrics = {
requestsTotal: meter.createCounter("proxy_requests_total", {
description: "Total number of proxy requests",
unit: "{request}",
}),
requestDuration: meter.createHistogram("proxy_request_duration_ms", {
description: "Proxy request duration in milliseconds",
unit: "ms",
}),
tokensInput: meter.createCounter("proxy_tokens_input", {
description: "Total input tokens consumed via proxy",
unit: "{token}",
}),
tokensOutput: meter.createCounter("proxy_tokens_output", {
description: "Total output tokens produced via proxy",
unit: "{token}",
}),
tokensCacheRead: meter.createCounter("proxy_tokens_cache_read", {
description: "Total cache-read tokens via proxy",
unit: "{token}",
}),
tokensCacheCreation: meter.createCounter("proxy_tokens_cache_creation", {
description: "Total cache-creation tokens via proxy",
unit: "{token}",
}),
tokensReasoning: meter.createCounter("proxy_tokens_reasoning", {
description: "Total reasoning tokens via proxy",
unit: "{token}",
}),
costTotal: meter.createCounter("proxy_cost_usd_total", {
description: "Total estimated cost in USD",
unit: "USD",
}),
errorsTotal: meter.createCounter("proxy_errors_total", {
description: "Total proxy errors",
unit: "{error}",
}),
retriesTotal: meter.createCounter("proxy_retries_total", {
description: "Total upstream retry attempts",
unit: "{retry}",
}),
modelSubstitutionTotal: meter.createCounter("proxy_model_substitution_total", {
description: "Total proxy requests where the response model differs from the requested model",
unit: "{substitution}",
}),
requestBodySize: meter.createHistogram("proxy_request_body_bytes", {
description: "Request body size in bytes sent upstream",
unit: "By",
}),
responseBodySize: meter.createHistogram("proxy_response_body_bytes", {
description: "Response body size in bytes received from upstream",
unit: "By",
}),
fallbackAttemptsTotal: meter.createCounter("proxy_fallback_attempts_total", {
description: "Total fallback provider attempts",
unit: "{attempt}",
}),
fallbackSuccessTotal: meter.createCounter("proxy_fallback_success_total", {
description: "Total successful fallback provider responses",
unit: "{success}",
}),
fallbackFailureTotal: meter.createCounter("proxy_fallback_failure_total", {
description: "Total failed fallback provider responses",
unit: "{failure}",
}),
};
_metrics = createdMetrics;
return createdMetrics;
}
// ---------------------------------------------------------------------------
// Header redaction (mirrors requestLogger.ts patterns)
// ---------------------------------------------------------------------------
/** Headers whose values must always be fully redacted. */
const SENSITIVE_HEADER_NAMES = new Set([
"authorization",
"proxy-authorization",
"x-api-key",
"cookie",
"set-cookie",
]);
/** Pattern matching header names likely to contain secrets. */
const SENSITIVE_HEADER_PATTERN = /token|secret|key|password|credential/i;
function redactHeaders(headers) {
const redacted = {};
for (const [key, value] of Object.entries(headers)) {
const lower = key.toLowerCase();
if (SENSITIVE_HEADER_NAMES.has(lower) ||
SENSITIVE_HEADER_PATTERN.test(lower)) {
redacted[key] = "[REDACTED]";
}
else {
redacted[key] = value;
}
}
return redacted;
}
/** Redact sensitive JSON fields in request/response bodies before logging. */
const SENSITIVE_BODY_KEYS = /api[_-]?key|token|secret|password|credential|authorization/i;
const BODY_LOGGING_ENABLED = process.env.NEUROLINK_PROXY_TRACE_BODY_LOGGING === "true";
const MAX_BODY_LOG_SIZE = Number.parseInt(process.env.NEUROLINK_PROXY_TRACE_BODY_LOG_BYTES ?? "8192", 10);
const MAX_STREAM_EVENTS_TO_LOG = 200;
function redactBodyForLogging(body, maxLen = 8192) {
const truncated = body.length > maxLen ? body.slice(0, maxLen) + "…[truncated]" : body;
try {
const parsed = JSON.parse(truncated);
function walk(obj) {
if (obj === null || typeof obj !== "object") {
return obj;
}
if (Array.isArray(obj)) {
return obj.map(walk);
}
const out = {};
for (const [k, v] of Object.entries(obj)) {
out[k] = SENSITIVE_BODY_KEYS.test(k) ? "[REDACTED]" : walk(v);
}
return out;
}
return JSON.stringify(walk(parsed));
}
catch {
return truncated;
}
}
function buildBodyEventAttributes(body) {
const redacted = redactBodyForLogging(body, MAX_BODY_LOG_SIZE);
return {
"proxy.body": redacted,
"proxy.body.size": body.length,
"proxy.body.logged": true,
"proxy.body.truncated": body.length > MAX_BODY_LOG_SIZE,
};
}
// ---------------------------------------------------------------------------
// Client app detection
// ---------------------------------------------------------------------------
function detectClientApp(userAgent) {
if (!userAgent) {
return "unknown";
}
if (userAgent.startsWith("claude-cli/")) {
return "cli";
}
if (userAgent.startsWith("ai/")) {
return "sdk";
}
return "unknown";
}
// ---------------------------------------------------------------------------
// ProxyTracer
// ---------------------------------------------------------------------------
class ProxyTracer {
rootSpan;
proxyTracer = getTracer("neurolink.proxy");
bridge = new OtelBridge();
requestId;
model;
startTime;
isStream;
accountEmail;
usage;
mode = "full";
constructor(rootSpan, requestId, model, stream) {
this.rootSpan = rootSpan;
this.requestId = requestId;
this.model = model;
this.startTime = Date.now();
this.isStream = stream;
}
/**
* Create a root span for a proxy request and set Langfuse context.
*
* If the incoming request carries a `traceparent` header, the root span
* will be linked to the caller's trace via OtelBridge.extractContext().
*/
static startRequest(ctx, incomingHeaders) {
const tracer = getTracer("neurolink.proxy");
// Extract parent context from incoming headers (Claude Code may send traceparent)
let parentContext = context.active();
if (incomingHeaders) {
const bridge = new OtelBridge();
const extracted = bridge.extractContext(incomingHeaders);
if (extracted) {
// Create a remote span context so the root span becomes a child of the caller
parentContext = trace.setSpanContext(context.active(), extracted);
}
}
const clientApp = ctx.clientApp ?? detectClientApp(ctx.userAgent);
const rootSpan = tracer.startSpan("proxy.request", {
attributes: {
"proxy.request_id": ctx.requestId,
"http.method": ctx.method,
"http.target": ctx.path,
"gen_ai.request.model": ctx.model,
"proxy.stream": ctx.stream,
"proxy.tool_count": ctx.toolCount,
"proxy.client_app": clientApp,
},
}, parentContext);
if (ctx.sessionId) {
rootSpan.setAttribute("session.id", ctx.sessionId);
}
if (ctx.userAgent) {
rootSpan.setAttribute("http.user_agent", ctx.userAgent);
}
// Read x-neurolink-* context headers from calling SDK (e.g., Curator)
const nlSessionId = incomingHeaders?.["x-neurolink-session-id"];
const nlUserId = incomingHeaders?.["x-neurolink-user-id"];
const nlConversationId = incomingHeaders?.["x-neurolink-conversation-id"];
if (nlSessionId) {
rootSpan.setAttribute("neurolink.session_id", nlSessionId);
}
if (nlUserId) {
rootSpan.setAttribute("neurolink.user_id", nlUserId);
}
if (nlConversationId) {
rootSpan.setAttribute("neurolink.conversation_id", nlConversationId);
}
const instance = new ProxyTracer(rootSpan, ctx.requestId, ctx.model, ctx.stream);
// Set Langfuse context (fire-and-forget — non-blocking)
// Prefer NeuroLink session/user from calling SDK over Claude Code session
setLangfuseContext({
sessionId: nlSessionId ?? ctx.sessionId,
userId: nlUserId,
conversationId: nlConversationId,
requestId: ctx.requestId,
traceName: `proxy:${ctx.model}`,
operationName: "proxy.request",
metadata: {
clientApp,
stream: ctx.stream,
toolCount: ctx.toolCount,
},
}).catch((err) => {
logger.debug(`${LOG_PREFIX} Failed to set Langfuse context`, {
error: err instanceof Error ? err.message : String(err),
});
});
return instance;
}
// -------------------------------------------------------------------------
// Child spans
// -------------------------------------------------------------------------
/** Span covering the initial request receive and parse phase. */
startReceive() {
return this.proxyTracer.startSpan("proxy.receive", {
attributes: {
"proxy.request_id": this.requestId,
},
}, trace.setSpan(context.active(), this.rootSpan));
}
/** Span covering account selection logic (fill-first / round-robin). */
startAccountSelection() {
return this.proxyTracer.startSpan("proxy.account_selection", {
attributes: {
"proxy.request_id": this.requestId,
},
}, trace.setSpan(context.active(), this.rootSpan));
}
/** Span covering a single upstream attempt. One per retry. */
startUpstreamAttempt(ctx) {
return this.proxyTracer.startSpan("proxy.upstream", {
attributes: {
"proxy.request_id": this.requestId,
"proxy.upstream.attempt": ctx.attempt,
"proxy.upstream.account": ctx.account,
"proxy.upstream.polyfill_headers": ctx.polyfillHeaders,
"proxy.upstream.polyfill_body": ctx.polyfillBody,
"http.url": ctx.upstreamUrl,
},
}, trace.setSpan(context.active(), this.rootSpan));
}
/** Span covering the SSE stream relay phase. */
startStream() {
return this.proxyTracer.startSpan("proxy.stream", {
attributes: {
"proxy.request_id": this.requestId,
"gen_ai.request.model": this.model,
},
}, trace.setSpan(context.active(), this.rootSpan));
}
// -------------------------------------------------------------------------
// Attribute setters
// -------------------------------------------------------------------------
/** Record account selection outcome on the root span. */
setAccountSelection(ctx) {
this.accountEmail = ctx.selectedAccount;
this.rootSpan.setAttributes({
"proxy.account.strategy": ctx.strategy,
"proxy.account.total": ctx.accountsTotal,
"proxy.account.healthy": ctx.accountsHealthy,
"proxy.account.selected": ctx.selectedAccount,
"proxy.account.type": ctx.accountType,
});
if (ctx.rateLimitBefore5h !== undefined) {
this.rootSpan.setAttribute("proxy.ratelimit.before.5h", ctx.rateLimitBefore5h);
}
if (ctx.rateLimitBefore7d !== undefined) {
this.rootSpan.setAttribute("proxy.ratelimit.before.7d", ctx.rateLimitBefore7d);
}
// Update Langfuse context with account as userId
setLangfuseContext({ userId: ctx.selectedAccount }).catch(() => {
// Non-fatal
});
}
/** Record token usage and cost on the root span. */
setUsage(ctx) {
this.usage = ctx;
const totalTokens = ctx.inputTokens +
ctx.outputTokens +
ctx.cacheCreationTokens +
ctx.cacheReadTokens +
(ctx.reasoningTokens ?? 0);
// NeuroLink-format token attributes (from SpanAttributes)
this.rootSpan.setAttributes({
"ai.tokens.input": ctx.inputTokens,
"ai.tokens.output": ctx.outputTokens,
"ai.tokens.total": totalTokens,
"ai.tokens.cache_creation": ctx.cacheCreationTokens,
"ai.tokens.cache_read": ctx.cacheReadTokens,
});
if (ctx.reasoningTokens !== undefined) {
this.rootSpan.setAttribute("ai.tokens.reasoning", ctx.reasoningTokens);
}
// GenAI semantic convention attributes (for Langfuse compatibility)
this.rootSpan.setAttributes({
"gen_ai.usage.input_tokens": ctx.inputTokens,
"gen_ai.usage.output_tokens": ctx.outputTokens,
"gen_ai.usage.total_tokens": totalTokens,
});
// Cost calculation via pricing.ts
const cost = calculateCost("anthropic", this.model, {
input: ctx.inputTokens,
output: ctx.outputTokens,
total: totalTokens,
cacheCreationTokens: ctx.cacheCreationTokens,
cacheReadTokens: ctx.cacheReadTokens,
});
if (cost > 0) {
this.rootSpan.setAttributes({
"ai.cost.total": cost,
"ai.cost.currency": "USD",
});
}
// Rate-limit utilisation after the request
if (ctx.rateLimitAfter5h !== undefined) {
this.rootSpan.setAttribute("proxy.ratelimit.after.5h", ctx.rateLimitAfter5h);
}
if (ctx.rateLimitAfter7d !== undefined) {
this.rootSpan.setAttribute("proxy.ratelimit.after.7d", ctx.rateLimitAfter7d);
}
}
/** Record an error on the root span. */
setError(errorType, errorMessage) {
this.rootSpan.setAttributes({
"error.type": errorType,
"error.message": errorMessage,
error: true,
});
}
/** Record whether the request was handled in full or passthrough mode. */
setMode(mode) {
this.mode = mode;
this.rootSpan.setAttribute("proxy.mode", mode);
}
/**
* Record that the proxy substituted a different model than was requested.
* Sets span attributes and increments the substitution metric counter.
*/
setModelSubstitution(requestedModel, actualModel) {
this.rootSpan.setAttributes({
"proxy.model_substituted": true,
"proxy.original_model": requestedModel,
"proxy.actual_model": actualModel,
"gen_ai.response.model": actualModel,
});
const m = getProxyMetrics();
m.modelSubstitutionTotal.add(1, {
requested_model: requestedModel,
actual_model: actualModel,
});
}
setFallbackInfo(info) {
if (!this.rootSpan) {
return;
}
this.rootSpan.setAttributes({
"proxy.fallback.triggered": info.triggered,
...(info.provider ? { "proxy.fallback.provider": info.provider } : {}),
...(info.model ? { "proxy.fallback.model": info.model } : {}),
"proxy.fallback.attempt_count": info.attemptCount,
"proxy.fallback.reason": info.reason,
});
}
// -------------------------------------------------------------------------
// Log payloads as span events
// -------------------------------------------------------------------------
/** Log the incoming client request body (redacted). */
logRequestBody(body) {
if (!BODY_LOGGING_ENABLED) {
this.rootSpan.addEvent("proxy.client.request_body", {
"proxy.body.size": body.length,
"proxy.body.logged": false,
});
return;
}
this.rootSpan.addEvent("proxy.client.request_body", {
...buildBodyEventAttributes(body),
});
}
/** Log the incoming client request headers (redacted). */
logRequestHeaders(headers) {
this.rootSpan.addEvent("proxy.client.request_headers", {
"proxy.headers": JSON.stringify(redactHeaders(headers)),
});
}
/** Log the upstream request body (redacted, as sent to Anthropic). */
logUpstreamRequestBody(body) {
if (!BODY_LOGGING_ENABLED) {
this.rootSpan.addEvent("proxy.upstream.request_body", {
"proxy.body.size": body.length,
"proxy.body.logged": false,
});
return;
}
this.rootSpan.addEvent("proxy.upstream.request_body", {
...buildBodyEventAttributes(body),
});
}
/** Log the upstream request headers (redacted). */
logUpstreamRequestHeaders(headers) {
this.rootSpan.addEvent("proxy.upstream.request_headers", {
"proxy.headers": JSON.stringify(redactHeaders(headers)),
});
}
/** Log the upstream response headers (redacted). */
logUpstreamResponseHeaders(headers) {
this.rootSpan.addEvent("proxy.upstream.response_headers", {
"proxy.headers": JSON.stringify(redactHeaders(headers)),
});
}
/** Log the upstream response body (redacted). */
logUpstreamResponseBody(body) {
if (!BODY_LOGGING_ENABLED) {
this.rootSpan.addEvent("proxy.upstream.response_body", {
"proxy.body.size": body.length,
"proxy.body.logged": false,
});
return;
}
this.rootSpan.addEvent("proxy.upstream.response_body", {
...buildBodyEventAttributes(body),
});
}
/** Log SSE stream events (each event has type, timestamp, data). */
logStreamEvents(events) {
if (!BODY_LOGGING_ENABLED) {
this.rootSpan.addEvent("proxy.stream.events", {
"proxy.stream.event_count": events.length,
"proxy.body.logged": false,
});
return;
}
const truncated = events.length > MAX_STREAM_EVENTS_TO_LOG;
const redactedEvents = events
.slice(0, MAX_STREAM_EVENTS_TO_LOG)
.map((event) => ({
...event,
data: event.data
? redactBodyForLogging(event.data, MAX_BODY_LOG_SIZE)
: "",
}));
if (truncated) {
redactedEvents.push({
type: "truncated",
timestamp: Date.now(),
data: "…[truncated]",
});
}
this.rootSpan.addEvent("proxy.stream.events", {
"proxy.stream.event_count": events.length,
"proxy.stream.events": JSON.stringify(redactedEvents),
"proxy.body.logged": true,
"proxy.body.truncated": truncated,
});
}
// -------------------------------------------------------------------------
// Metric recording helpers
// -------------------------------------------------------------------------
/** Record an upstream retry attempt. */
recordRetry(account, reason) {
const m = getProxyMetrics();
m.retriesTotal.add(1, {
model: this.model,
account,
reason,
});
}
/** Record request and/or response body sizes for bandwidth tracking. */
recordBodySizes(requestBytes, responseBytes) {
const m = getProxyMetrics();
const labels = {
model: this.model,
account: this.accountEmail ?? "unknown",
};
if (requestBytes !== undefined && requestBytes > 0) {
m.requestBodySize.record(requestBytes, labels);
}
if (responseBytes !== undefined && responseBytes > 0) {
m.responseBodySize.record(responseBytes, labels);
}
}
// -------------------------------------------------------------------------
// Context accessors
// -------------------------------------------------------------------------
/** Return the OTel trace/span IDs for this request (for log correlation). */
getTraceContext() {
const spanCtx = this.rootSpan.spanContext();
return {
traceId: spanCtx.traceId,
spanId: spanCtx.spanId,
};
}
/** Return the captured usage (set by setUsage). */
getUsage() {
return this.usage;
}
// -------------------------------------------------------------------------
// Lifecycle
// -------------------------------------------------------------------------
/** End the root span with final HTTP status and duration, and emit OTEL metrics. */
end(responseStatus, durationMs) {
this.rootSpan.setAttributes({
"http.status_code": responseStatus,
"proxy.duration_ms": durationMs,
"proxy.mode": this.mode,
...(this.accountEmail
? { "proxy.account": this.accountEmail }
: undefined),
});
if (responseStatus >= 400) {
this.rootSpan.setStatus({
code: SpanStatusCode.ERROR,
message: `HTTP ${responseStatus}`,
});
}
else {
this.rootSpan.setStatus({ code: SpanStatusCode.OK });
}
this.rootSpan.end();
// ---- Emit OTEL metrics (lazy-init instruments) ----
const m = getProxyMetrics();
const labels = {
model: this.model,
account: this.accountEmail ?? "unknown",
status: String(responseStatus),
stream: String(this.isStream),
mode: this.mode,
};
m.requestsTotal.add(1, labels);
m.requestDuration.record(durationMs, labels);
// Token metrics (only if usage was captured)
if (this.usage) {
const tokenLabels = {
model: this.model,
account: this.accountEmail ?? "unknown",
};
m.tokensInput.add(this.usage.inputTokens, tokenLabels);
m.tokensOutput.add(this.usage.outputTokens, tokenLabels);
m.tokensCacheRead.add(this.usage.cacheReadTokens, tokenLabels);
m.tokensCacheCreation.add(this.usage.cacheCreationTokens, tokenLabels);
if (this.usage.reasoningTokens) {
m.tokensReasoning.add(this.usage.reasoningTokens, tokenLabels);
}
// Cost
const totalTokens = this.usage.inputTokens +
this.usage.outputTokens +
this.usage.cacheCreationTokens +
this.usage.cacheReadTokens +
(this.usage.reasoningTokens ?? 0);
const cost = calculateCost("anthropic", this.model, {
input: this.usage.inputTokens,
output: this.usage.outputTokens,
total: totalTokens,
cacheCreationTokens: this.usage.cacheCreationTokens,
cacheReadTokens: this.usage.cacheReadTokens,
});
if (cost > 0) {
m.costTotal.add(cost, tokenLabels);
}
}
// Error metrics
if (responseStatus >= 400) {
const errorType = responseStatus === 429
? "rate_limit"
: responseStatus === 401
? "auth"
: responseStatus >= 500
? "server"
: "client";
m.errorsTotal.add(1, {
model: this.model,
account: this.accountEmail ?? "unknown",
error_type: errorType,
status: String(responseStatus),
});
}
}
/** Record metrics via TelemetryService (call after setUsage). */
recordMetrics() {
if (!this.usage) {
return;
}
const totalTokens = this.usage.inputTokens +
this.usage.outputTokens +
this.usage.cacheCreationTokens +
this.usage.cacheReadTokens +
(this.usage.reasoningTokens ?? 0);
const durationMs = Date.now() - this.startTime;
const cost = calculateCost("anthropic", this.model, {
input: this.usage.inputTokens,
output: this.usage.outputTokens,
total: totalTokens,
cacheCreationTokens: this.usage.cacheCreationTokens,
cacheReadTokens: this.usage.cacheReadTokens,
});
TelemetryService.getInstance().recordAIRequest("anthropic", this.model, totalTokens, durationMs, cost > 0 ? cost : undefined);
}
// -------------------------------------------------------------------------
// Context propagation
// -------------------------------------------------------------------------
/**
* Get trace context headers for propagation to the upstream Anthropic request.
* Injects the current trace's `traceparent` / `tracestate` into a new header map.
*/
getTraceHeaders() {
return this.bridge.injectContext({}, trace.setSpan(context.active(), this.rootSpan));
}
}
export function recordFallbackAttempt(attrs) {
try {
const m = getProxyMetrics();
const labels = { provider: attrs.provider, model: attrs.model };
m.fallbackAttemptsTotal.add(1, labels);
if (attrs.status === "success") {
m.fallbackSuccessTotal.add(1, labels);
}
else {
m.fallbackFailureTotal.add(1, {
...labels,
error: attrs.errorMessage?.slice(0, 100) ?? "unknown",
});
}
}
catch {
// metrics are best-effort
}
}
export { ProxyTracer };