revenium-middleware-litellm-node
Version:
Comprehensive middleware for Node.js applications using LiteLLM Proxy to automatically track LLM usage, costs, and performance metrics with Revenium
572 lines • 21.6 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.patchHttpClient = patchHttpClient;
exports.unpatchHttpClient = unpatchHttpClient;
exports.isHttpClientPatched = isHttpClientPatched;
exports.resetHttpClientManager = resetHttpClientManager;
const config_1 = require("./config");
const tracking_1 = require("./tracking");
const crypto_1 = require("crypto");
const validation_1 = require("./utils/validation");
const error_handling_1 = require("./utils/error-handling");
const constants_1 = require("./constants");
// Global logger
const logger = (0, config_1.getLogger)();
/**
* HTTP client manager singleton for proper state management
*/
class HttpClientManager {
constructor() {
this.isPatched = false;
this.originalFetch = null;
// Private constructor to enforce singleton pattern
}
/**
* Get the singleton instance
*/
static getInstance() {
if (!HttpClientManager.instance) {
HttpClientManager.instance = new HttpClientManager();
}
return HttpClientManager.instance;
}
/**
* Reset the singleton instance (for testing)
*/
static resetInstance() {
HttpClientManager.instance = null;
}
/**
* Check if HTTP client is patched
*/
isHttpClientPatched() {
return this.isPatched;
}
/**
* Get the original fetch function
*/
getOriginalFetch() {
return this.originalFetch;
}
/**
* Set the patched state and store original fetch
*/
setPatched(patched, originalFetch) {
this.isPatched = patched;
if (originalFetch)
this.originalFetch = originalFetch;
}
/**
* Reset to unpatched state (for testing)
*/
reset() {
this.isPatched = false;
this.originalFetch = null;
}
}
HttpClientManager.instance = null;
/**
* Check if a URL is a LiteLLM Proxy endpoint (chat completions or embeddings)
*/
function isLiteLLMProxyRequest(url, config) {
try {
const requestUrl = new URL(url);
const proxyUrl = new URL(config.litellmProxyUrl);
// Check if the request is going to our configured LiteLLM Proxy
const isSameHost = requestUrl.hostname === proxyUrl.hostname;
const isSamePort = requestUrl.port === proxyUrl.port ||
(requestUrl.port === '80' || requestUrl.port === '443') && proxyUrl.port === '';
// Handle two cases:
// 1. Proxy URL is a base URL (e.g., http://localhost:4000) - check if request is to supported endpoint
// 2. Proxy URL is a full endpoint URL (e.g., http://localhost:4000/chat/completions) - check exact match
let isCorrectEndpoint = false;
if (constants_1.supportedEndpoints.some(endpoint => proxyUrl.pathname.endsWith(endpoint))) {
// Case 2: Proxy URL includes the endpoint path - check exact path match
isCorrectEndpoint = requestUrl.pathname === proxyUrl.pathname;
}
else {
// Case 1: Proxy URL is base URL - check if request is to any supported endpoint
isCorrectEndpoint = constants_1.supportedEndpoints.some(endpoint => requestUrl.pathname.endsWith(endpoint));
}
return isSameHost && isSamePort && isCorrectEndpoint;
}
catch (error) {
return false;
}
}
/**
* Create patched fetch function that intercepts LiteLLM Proxy requests
*/
function createPatchedFetch() {
return async function patchedFetch(input, init) {
const config = (0, config_1.getConfig)();
// Convert input to URL string for checking
const url = typeof input === 'string' ? input :
input instanceof URL ? input.toString() :
input.url;
// Only intercept LiteLLM Proxy requests if we have config
if (!config || !isLiteLLMProxyRequest(url, config)) {
const clientManager = HttpClientManager.getInstance();
const originalFetchFn = clientManager.getOriginalFetch();
if (!originalFetchFn)
throw new Error('Original fetch function not available');
return originalFetchFn(input, init);
}
// Validate the URL against our proxy configuration
const urlValidation = (0, validation_1.validateLiteLLMUrl)(url, config.litellmProxyUrl);
if (!urlValidation.isValid) {
logger.warn('Invalid LiteLLM proxy URL detected', {
url,
errors: urlValidation.errors,
configuredProxy: config.litellmProxyUrl
});
// Continue with original fetch for invalid URLs
const clientManager = HttpClientManager.getInstance();
const originalFetchFn = clientManager.getOriginalFetch();
if (!originalFetchFn)
throw new Error('Original fetch function not available');
return originalFetchFn(input, init);
}
// Extract and validate request context
const rawHeaders = init?.headers ? Object.fromEntries(new Headers(init.headers)) : {};
const validatedHeaders = (0, validation_1.validateHeaders)(rawHeaders);
const requestContext = {
url,
method: init?.method || 'GET',
headers: validatedHeaders,
body: init?.body || null,
startTime: Date.now(),
metadata: (0, tracking_1.extractMetadataFromHeaders)(validatedHeaders)
};
const requestId = (0, crypto_1.randomUUID)();
logger.debug('Intercepted LiteLLM Proxy request', {
url: requestContext.url,
method: requestContext.method,
requestId,
hasMetadata: !!requestContext.metadata
});
try {
// Add LiteLLM Proxy authentication if configured
const headers = new Headers(init?.headers);
if (config.litellmApiKey)
headers.set('Authorization', `Bearer ${config.litellmApiKey}`);
// Make the actual request
const clientManager = HttpClientManager.getInstance();
const originalFetchFn = clientManager.getOriginalFetch();
if (!originalFetchFn)
throw new Error('Original fetch function not available');
const response = await originalFetchFn(input, {
...init,
headers
});
const endTime = Date.now();
const duration = endTime - requestContext.startTime;
// Clone response to read body without consuming it
const responseClone = response.clone();
logger.debug('LiteLLM Proxy response received', {
status: response.status,
requestId,
duration
});
// Handle successful chat completion responses
if (response.ok && requestContext.method === 'POST') {
handleSuccessfulResponse(requestContext, response, responseClone, requestId, duration);
}
else if (!response.ok) {
logger.warn('LiteLLM Proxy request failed', {
status: response.status,
statusText: response.statusText,
requestId
});
}
return response;
}
catch (error) {
const endTime = Date.now();
const duration = endTime - requestContext.startTime;
logger.error('LiteLLM Proxy request error', {
error: error instanceof Error ? error.message : String(error),
requestId,
duration
});
throw error;
}
};
}
/**
* Parse request body for either chat completions or embeddings
*/
async function parseRequestBody(requestContext, requestId, endpointType) {
if (!requestContext.body)
return null;
try {
const bodyText = typeof requestContext.body === 'string' ?
requestContext.body :
await new Response(requestContext.body).text();
return JSON.parse(bodyText);
}
catch (error) {
logger.warn(`Failed to parse ${endpointType} request body`, {
requestId,
error: error instanceof Error ? error.message : String(error)
});
return null;
}
}
/**
* Handle successful chat completion response and track usage
*/
async function handleSuccessfulResponse(requestContext, originalResponse, responseClone, requestId, duration) {
try {
// Determine endpoint type from URL
const url = new URL(requestContext.url);
const isEmbeddingsEndpoint = url.pathname.endsWith('/embeddings') || url.pathname.endsWith('/v1/embeddings');
if (isEmbeddingsEndpoint) {
// Handle embeddings request
const requestData = await parseRequestBody(requestContext, requestId, 'embeddings');
const model = requestData?.model || 'unknown';
await handleEmbeddingResponse(responseClone, requestContext, requestId, duration, model);
}
else {
// Handle chat completions request
const requestData = await parseRequestBody(requestContext, requestId, 'chat');
const isStreaming = requestData?.stream === true;
const model = requestData?.model || 'unknown';
if (isStreaming) {
await handleStreamingResponse(responseClone, requestContext, requestId, duration, model);
}
else {
handleNonStreamingResponse(responseClone, requestContext, requestId, duration, model);
}
}
}
catch (error) {
logger.error('Error handling LiteLLM response', {
error: error instanceof Error ? error.message : String(error),
requestId
});
}
}
/**
* Handle non-streaming chat completion response
*/
async function handleNonStreamingResponse(response, requestContext, requestId, duration, model) {
try {
const responseData = await response.json();
const usage = (0, tracking_1.extractUsageFromResponse)(responseData);
logger.debug('Extracted usage from non-streaming response', {
requestId,
model,
...usage
});
// Track usage asynchronously
(0, tracking_1.trackUsageAsync)({
requestId,
model,
promptTokens: usage.promptTokens,
completionTokens: usage.completionTokens,
totalTokens: usage.totalTokens,
duration,
finishReason: usage.finishReason,
usageMetadata: requestContext.metadata,
isStreamed: false
});
}
catch (error) {
logger.error('Error processing non-streaming response', {
error: error instanceof Error ? error.message : String(error),
requestId
});
}
}
/**
* Handle streaming chat completion response
*/
async function handleStreamingResponse(response, requestContext, requestId, duration, model) {
logger.debug('Processing streaming response', { requestId, model });
if (!response.body) {
logger.warn('Streaming response has no body', { requestId });
// Track with minimal data if no body
(0, tracking_1.trackUsageAsync)({
requestId,
model,
promptTokens: 0,
completionTokens: 0,
totalTokens: 0,
duration,
finishReason: 'stop',
usageMetadata: requestContext.metadata,
isStreamed: true,
timeToFirstToken: duration
});
return;
}
try {
const streamParser = new StreamingResponseParser(requestId, model, requestContext, duration);
await streamParser.parseStream(response.body);
}
catch (error) {
logger.error('Error parsing streaming response', {
error: error instanceof Error ? error.message : String(error),
requestId
});
// Track with minimal data on error
(0, tracking_1.trackUsageAsync)({
requestId,
model,
promptTokens: 0,
completionTokens: 0,
totalTokens: 0,
duration,
finishReason: 'error',
usageMetadata: requestContext.metadata,
isStreamed: true,
timeToFirstToken: duration
});
}
}
/**
* Handle embeddings response and track usage
*/
async function handleEmbeddingResponse(response, requestContext, requestId, duration, model) {
try {
const responseData = await response.json();
const usage = responseData.usage;
logger.debug('Extracted usage from embeddings response', {
requestId,
model,
promptTokens: usage.prompt_tokens,
totalTokens: usage.total_tokens
});
// Track embeddings usage asynchronously
(0, tracking_1.trackEmbeddingsUsageAsync)({
requestId,
model,
promptTokens: usage.prompt_tokens,
totalTokens: usage.total_tokens,
duration,
usageMetadata: requestContext.metadata
});
}
catch (error) {
logger.error('Error processing embeddings response', {
error: error instanceof Error ? error.message : String(error),
requestId
});
}
}
/**
* Streaming response parser for LiteLLM SSE streams
*/
class StreamingResponseParser {
constructor(requestId, model, requestContext, requestDuration) {
this.firstTokenTime = null;
this.promptTokens = 0;
this.completionTokens = 0;
this.totalTokens = 0;
this.finishReason = null;
this.logger = (0, config_1.getLogger)();
this.requestId = requestId;
this.model = model;
this.requestContext = requestContext;
this.requestDuration = requestDuration;
this.startTime = Date.now();
}
async parseStream(body) {
const reader = body.getReader();
const decoder = new TextDecoder();
let buffer = '';
try {
while (true) {
const { done, value } = await reader.read();
if (done)
break;
// Decode chunk and add to buffer
buffer += decoder.decode(value, { stream: true });
// Process complete SSE messages
const lines = buffer.split('\n');
buffer = lines.pop() || ''; // Keep incomplete line in buffer
for (const line of lines) {
this.processSSELine(line);
}
}
// Process any remaining buffer content
if (buffer.trim()) {
this.processSSELine(buffer);
}
}
finally {
reader.releaseLock();
this.finalizeTracking();
}
}
processSSELine(line) {
const trimmed = line.trim();
// Skip empty lines and comments
if (!trimmed || trimmed.startsWith(':'))
return;
// Parse SSE data lines
if (trimmed.startsWith('data: ')) {
const data = trimmed.slice(6); // Remove 'data: ' prefix
// Check for stream end marker
if (data === '[DONE]') {
this.logger.debug('Stream completed', { requestId: this.requestId });
return;
}
try {
const chunk = JSON.parse(data);
this.processStreamChunk(chunk);
}
catch (error) {
this.logger.debug('Failed to parse stream chunk', {
requestId: this.requestId,
data: data.substring(0, 100)
});
}
}
}
processStreamChunk(chunk) {
// Validate and sanitize chunk data
const validatedChunk = (0, validation_1.validateStreamChunk)(chunk);
if (!validatedChunk) {
this.logger.debug('Invalid stream chunk received', {
requestId: this.requestId,
chunkType: typeof chunk
});
return;
}
// Record first token time
if (this.firstTokenTime === null && validatedChunk.choices?.[0]?.delta?.content) {
this.firstTokenTime = Date.now();
this.logger.debug('First token received', {
requestId: this.requestId,
timeToFirstToken: this.firstTokenTime - this.startTime
});
}
// Extract usage information (typically in the last chunk)
if (validatedChunk.usage) {
this.promptTokens = validatedChunk.usage.prompt_tokens || 0;
this.completionTokens = validatedChunk.usage.completion_tokens || 0;
this.totalTokens = validatedChunk.usage.total_tokens || 0;
this.logger.debug('Usage data extracted from stream', {
requestId: this.requestId,
promptTokens: this.promptTokens,
completionTokens: this.completionTokens,
totalTokens: this.totalTokens
});
}
// Extract finish reason
if (validatedChunk.choices?.[0]?.finish_reason)
this.finishReason = validatedChunk.choices[0].finish_reason;
// Some providers send usage in different chunk structures
if (!this.totalTokens && validatedChunk.x_groq?.usage) {
// Groq-specific usage format
this.promptTokens = validatedChunk.x_groq.usage.prompt_tokens || 0;
this.completionTokens = validatedChunk.x_groq.usage.completion_tokens || 0;
this.totalTokens = validatedChunk.x_groq.usage.total_tokens || 0;
}
}
finalizeTracking() {
const timeToFirstToken = this.firstTokenTime ?
(this.firstTokenTime - this.startTime) :
this.requestDuration;
this.logger.debug('Finalizing streaming response tracking', {
requestId: this.requestId,
model: this.model,
promptTokens: this.promptTokens,
completionTokens: this.completionTokens,
totalTokens: this.totalTokens,
finishReason: this.finishReason,
timeToFirstToken
});
// Track the complete streaming response
(0, tracking_1.trackUsageAsync)({
requestId: this.requestId,
model: this.model,
promptTokens: this.promptTokens,
completionTokens: this.completionTokens,
totalTokens: this.totalTokens,
duration: this.requestDuration,
finishReason: this.finishReason || 'stop',
usageMetadata: this.requestContext.metadata,
isStreamed: true,
timeToFirstToken
});
}
}
/**
* Patch the global fetch function to intercept LiteLLM Proxy requests
*/
function patchHttpClient() {
const clientManager = HttpClientManager.getInstance();
if (clientManager.isHttpClientPatched()) {
logger.debug('HTTP client already patched');
return true;
}
if (typeof globalThis.fetch !== 'function') {
const errorContext = (0, error_handling_1.createErrorContext)()
.with('fetchType', typeof globalThis.fetch)
.build();
logger.error('Global fetch function not available', errorContext);
return false;
}
try {
// Store original fetch
const originalFetch = globalThis.fetch;
clientManager.setPatched(false, originalFetch);
// Replace with patched version
globalThis.fetch = createPatchedFetch();
clientManager.setPatched(true);
logger.info('LiteLLM HTTP client middleware enabled');
return true;
}
catch (error) {
const errorContext = (0, error_handling_1.createErrorContext)()
.with('error', error instanceof Error ? error.message : String(error))
.with('stack', error instanceof Error ? error.stack : undefined)
.build();
logger.error('Failed to patch HTTP client', errorContext);
// Throw a proper error for better debugging
throw new error_handling_1.PatchingError('Failed to patch HTTP client for LiteLLM interception', errorContext);
}
}
/**
* Restore the original fetch function
*/
function unpatchHttpClient() {
const clientManager = HttpClientManager.getInstance();
if (!clientManager.isHttpClientPatched()) {
logger.debug('HTTP client not patched');
return true;
}
const originalFetch = clientManager.getOriginalFetch();
if (!originalFetch) {
logger.error('Original fetch function not stored');
return false;
}
try {
globalThis.fetch = originalFetch;
clientManager.setPatched(false);
logger.info('LiteLLM HTTP client middleware disabled');
return true;
}
catch (error) {
logger.error('Failed to unpatch HTTP client', {
error: error instanceof Error ? error.message : String(error)
});
return false;
}
}
/**
* Check if HTTP client is patched
*/
function isHttpClientPatched() {
return HttpClientManager.getInstance().isHttpClientPatched();
}
/**
* Reset HTTP client manager (for testing)
*/
function resetHttpClientManager() {
HttpClientManager.resetInstance();
}
//# sourceMappingURL=client.js.map