UNPKG

@juspay/neurolink

Version:

Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio

github.com/juspay/neurolink

juspay/neurolink

433 lines (432 loc) • 15.6 kB

JavaScript

/** * SSE Stream Interceptor * * A zero-overhead TransformStream that taps Anthropic SSE streaming responses * to extract telemetry data (token usage, model info, content blocks, thinking * blocks, tool use) while passing every byte through to the client unmodified * and without delay. * * The interceptor buffers partial SSE events internally (chunks may split * across event boundaries) but never holds back any bytes from the readable * side of the stream. * * Usage: * const { stream, telemetry } = createSSEInterceptor(); * upstreamResponse.body.pipeThrough(stream).pipeTo(clientWritable); * const data = await telemetry; // resolves on stream end */ // --------------------------------------------------------------------------- // Constants // --------------------------------------------------------------------------- /** Maximum accumulated content per block before we stop appending (100 KB). */ const MAX_BLOCK_CONTENT_BYTES = 100 * 1024; /** Maximum number of events to record in the event log to cap memory usage. */ const MAX_EVENT_LOG_ENTRIES = 5000; const MAX_EVENT_DATA_BYTES = 2048; const MAX_RAW_TEXT_BYTES = 1024 * 1024; const TRUNCATION_MARKER = "...[TRUNCATED]"; // --------------------------------------------------------------------------- // Internal SSE line parser // --------------------------------------------------------------------------- /** * Incrementally parse SSE events from a growing buffer of text. * * SSE events are separated by a blank line (`\n\n`). Each event consists of * field lines (`event: ...`, `data: ...`). We consume complete events and * return them, leaving any trailing partial event in the buffer. */ function extractSSEEvents(buffer) { const events = []; // Split on double-newline boundaries. The last segment may be an // incomplete event if the chunk was split mid-event. let cursor = 0; while (cursor < buffer.length) { const boundary = buffer.indexOf("\n\n", cursor); if (boundary === -1) { // No more complete events — everything from cursor onward is partial. break; } const rawBlock = buffer.slice(cursor, boundary); cursor = boundary + 2; // skip past the \n\n let eventType = ""; let dataValue = ""; const lines = rawBlock.split("\n"); for (const line of lines) { if (line.startsWith("event: ")) { eventType = line.slice(7).trim(); } else if (line.startsWith("data: ")) { dataValue = line.slice(6); } else if (line.startsWith("data:")) { // handle `data:` with no space (edge case) dataValue = line.slice(5); } } if (eventType || dataValue) { events.push({ event: eventType, data: dataValue }); } } return { events, remainder: buffer.slice(cursor) }; } // --------------------------------------------------------------------------- // Telemetry accumulator // --------------------------------------------------------------------------- function createAccumulator(captureRawText) { return { messageId: "", model: "", inputTokens: 0, outputTokens: 0, cacheCreationInputTokens: 0, cacheReadInputTokens: 0, contentBlocks: [], blockByteCounts: new Map(), stopReason: null, stopSequence: null, eventCount: 0, startTime: Date.now(), totalBytesReceived: 0, events: [], rawTextChunks: captureRawText ? [] : undefined, rawTextBytes: 0, rawTextTruncated: false, eventLogTruncated: false, }; } function utf8ByteLength(input) { return Buffer.byteLength(input, "utf8"); } function truncateUtf8String(input, maxBytes) { if (utf8ByteLength(input) <= maxBytes) { return input; } const markerBytes = utf8ByteLength(TRUNCATION_MARKER); if (maxBytes <= 0 || maxBytes < markerBytes) { return ""; } let output = ""; let usedBytes = 0; for (const char of input) { const charBytes = utf8ByteLength(char); if (usedBytes + charBytes + markerBytes > maxBytes) { break; } output += char; usedBytes += charBytes; } return `${output}${TRUNCATION_MARKER}`; } function truncateString(input, maxBytes) { return truncateUtf8String(input, maxBytes); } function appendCappedFragment(current, fragment, currentBytes, maxBytes) { const fragmentBytes = utf8ByteLength(fragment); if (currentBytes >= maxBytes) { return { value: current && current.endsWith(TRUNCATION_MARKER) ? current : `${current ?? ""}${TRUNCATION_MARKER}`, nextBytes: currentBytes + fragmentBytes, }; } const remainingBytes = maxBytes - currentBytes; const nextBytes = currentBytes + fragmentBytes; if (fragmentBytes <= remainingBytes) { return { value: `${current ?? ""}${fragment}`, nextBytes, }; } return { value: `${current ?? ""}${truncateUtf8String(fragment, remainingBytes)}`, nextBytes, }; } function appendRawTextChunk(acc, chunk) { if (!acc.rawTextChunks || acc.rawTextTruncated) { return; } const remainingBytes = MAX_RAW_TEXT_BYTES - acc.rawTextBytes; if (remainingBytes <= 0) { acc.rawTextChunks.push(TRUNCATION_MARKER); acc.rawTextTruncated = true; return; } const chunkBytes = utf8ByteLength(chunk); if (chunkBytes <= remainingBytes) { acc.rawTextChunks.push(chunk); acc.rawTextBytes += chunkBytes; return; } acc.rawTextChunks.push(truncateUtf8String(chunk, remainingBytes)); acc.rawTextBytes = MAX_RAW_TEXT_BYTES; acc.rawTextTruncated = true; } function getBlockContentBytes(block) { return utf8ByteLength(block.text ?? block.thinking ?? block.toolInput ?? ""); } function finalize(acc) { const totalTokens = acc.inputTokens + acc.outputTokens; return { messageId: acc.messageId, model: acc.model, usage: { inputTokens: acc.inputTokens, outputTokens: acc.outputTokens, cacheCreationInputTokens: acc.cacheCreationInputTokens, cacheReadInputTokens: acc.cacheReadInputTokens, totalTokens, }, contentBlocks: acc.contentBlocks, stopReason: acc.stopReason, stopSequence: acc.stopSequence, eventCount: acc.eventCount, streamDurationMs: Date.now() - acc.startTime, totalBytesReceived: acc.totalBytesReceived, events: acc.events, ...(acc.rawTextChunks ? { rawText: acc.rawTextChunks.join("") } : {}), }; } // --------------------------------------------------------------------------- // Event processors // --------------------------------------------------------------------------- /* eslint-disable @typescript-eslint/no-explicit-any */ function processMessageStart(acc, parsed) { const msg = parsed.message; if (!msg) { return; } acc.messageId = msg.id ?? ""; acc.model = msg.model ?? ""; const usage = msg.usage; if (usage) { acc.inputTokens += usage.input_tokens ?? 0; acc.outputTokens += usage.output_tokens ?? 0; acc.cacheCreationInputTokens += usage.cache_creation_input_tokens ?? 0; acc.cacheReadInputTokens += usage.cache_read_input_tokens ?? 0; } } function processContentBlockStart(acc, parsed) { const index = parsed.index ?? 0; const block = parsed.content_block; if (!block) { return; } const blockType = block.type; const entry = { index, type: blockType }; if (blockType === "text") { entry.text = block.text ?? ""; } else if (blockType === "thinking") { entry.thinking = block.thinking ?? ""; } else if (blockType === "tool_use") { entry.toolName = block.name ?? ""; entry.toolId = block.id ?? ""; entry.toolInput = ""; } acc.contentBlocks.push(entry); acc.blockByteCounts.set(index, getBlockContentBytes(entry)); } function processContentBlockDelta(acc, parsed) { const index = parsed.index ?? 0; const delta = parsed.delta; if (!delta) { return; } // Find the matching block const block = acc.contentBlocks.find((b) => b.index === index); if (!block) { return; } const currentBytes = acc.blockByteCounts.get(index) ?? 0; const capped = currentBytes >= MAX_BLOCK_CONTENT_BYTES; if (delta.type === "text_delta" && delta.text !== null) { const fragment = delta.text; const updated = appendCappedFragment(block.text, fragment, currentBytes, MAX_BLOCK_CONTENT_BYTES); acc.blockByteCounts.set(index, updated.nextBytes); if (!capped || !block.text?.endsWith(TRUNCATION_MARKER)) { block.text = updated.value; } } else if (delta.type === "thinking_delta" && delta.thinking !== null) { const fragment = delta.thinking; const updated = appendCappedFragment(block.thinking, fragment, currentBytes, MAX_BLOCK_CONTENT_BYTES); acc.blockByteCounts.set(index, updated.nextBytes); if (!capped || !block.thinking?.endsWith(TRUNCATION_MARKER)) { block.thinking = updated.value; } } else if (delta.type === "input_json_delta" && delta.partial_json !== null) { const fragment = delta.partial_json; const updated = appendCappedFragment(block.toolInput, fragment, currentBytes, MAX_BLOCK_CONTENT_BYTES); acc.blockByteCounts.set(index, updated.nextBytes); if (!capped || !block.toolInput?.endsWith(TRUNCATION_MARKER)) { block.toolInput = updated.value; } } } function processMessageDelta(acc, parsed) { const delta = parsed.delta; if (delta) { acc.stopReason = delta.stop_reason ?? acc.stopReason; acc.stopSequence = delta.stop_sequence ?? acc.stopSequence; } const usage = parsed.usage; if (usage) { // message_delta provides the final output_tokens count; treat it as // additive because message_start reports output_tokens: 0 for the // initial placeholder. acc.outputTokens += usage.output_tokens ?? 0; } } /* eslint-enable @typescript-eslint/no-explicit-any */ // --------------------------------------------------------------------------- // Dispatch a parsed SSE event to the appropriate handler // --------------------------------------------------------------------------- function processEvent(acc, event) { acc.eventCount++; const now = Date.now(); // For content_block_delta events, store only the event type to save memory. // For all other events, store the full data string. // Cap event log to prevent unbounded growth. if (acc.events.length < MAX_EVENT_LOG_ENTRIES - 1) { if (event.event === "content_block_delta") { acc.events.push({ type: event.event, timestamp: now, data: "" }); } else { acc.events.push({ type: event.event, timestamp: now, data: truncateString(event.data, MAX_EVENT_DATA_BYTES), }); } } else if (!acc.eventLogTruncated) { acc.events.push({ type: "truncated", timestamp: now, data: TRUNCATION_MARKER, }); acc.eventLogTruncated = true; } // Skip JSON parsing for events with no data payload if (!event.data) { return; } let parsed; try { parsed = JSON.parse(event.data); } catch { // Malformed JSON — skip silently, bytes already forwarded to client return; } switch (event.event) { case "message_start": processMessageStart(acc, parsed); break; case "content_block_start": processContentBlockStart(acc, parsed); break; case "content_block_delta": processContentBlockDelta(acc, parsed); break; case "message_delta": processMessageDelta(acc, parsed); break; // content_block_stop, message_stop, ping — no telemetry to extract default: break; } } // --------------------------------------------------------------------------- // Public API // --------------------------------------------------------------------------- /** * Create an SSE interceptor that extracts telemetry from an Anthropic * streaming response while passing all bytes through unmodified. * * ```ts * const { stream, telemetry } = createSSEInterceptor(); * upstreamResponse.body * .pipeThrough(stream) * .pipeTo(clientWritable); * * const data = await telemetry; * console.log(data.usage.totalTokens); * ``` */ export function createSSEInterceptor(options = {}) { const captureRawText = options.captureRawText ?? false; const acc = createAccumulator(captureRawText); let sseBuffer = ""; let resolved = false; const decoder = new TextDecoder(); let resolveTelemetry; const telemetryPromise = new Promise((resolve) => { resolveTelemetry = resolve; }); /** Resolve the telemetry promise exactly once. */ function settle() { if (resolved) { return; } resolved = true; resolveTelemetry(finalize(acc)); } const transform = new TransformStream({ transform(chunk, controller) { // Forward the raw bytes immediately — zero delay to client. controller.enqueue(chunk); // Track total bytes received for bandwidth metrics. acc.totalBytesReceived += chunk.byteLength; // Decode and buffer for SSE parsing. const decodedChunk = decoder.decode(chunk, { stream: true }); appendRawTextChunk(acc, decodedChunk); sseBuffer += decodedChunk; const { events, remainder } = extractSSEEvents(sseBuffer); sseBuffer = remainder; for (const event of events) { processEvent(acc, event); } }, flush() { const finalChunk = decoder.decode(); if (finalChunk) { appendRawTextChunk(acc, finalChunk); sseBuffer += finalChunk; } // Process any trailing data left in the buffer (e.g. a final event // not followed by a double-newline). if (sseBuffer.trim()) { const { events } = extractSSEEvents(sseBuffer + "\n\n"); for (const event of events) { processEvent(acc, event); } } settle(); }, }); // Wrap the writable side so we can intercept abort() — which does NOT // trigger the TransformStream's flush() or cancel() callbacks. const innerWriter = transform.writable.getWriter(); const writable = new WritableStream({ write(chunk) { return innerWriter.write(chunk); }, close() { return innerWriter.close(); }, abort(reason) { settle(); return innerWriter.abort(reason); }, }); const stream = { readable: transform.readable, writable, }; return { stream, telemetry: telemetryPromise }; }