UNPKG

@juspay/neurolink

Version:

Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio

100 lines 4.52 kB
/** * Context Budget Checker * * Pre-generation validation that estimates total input token cost * and compares against the model's available input space. * * This runs BEFORE every LLM call to prevent context overflow. */ import { getAvailableInputTokens } from "../constants/contextWindows.js"; import { estimateMessagesTokens, estimateTokens, TOKENS_PER_MESSAGE, } from "../utils/tokenEstimation.js"; import { SpanSerializer, SpanType, SpanStatus, getMetricsAggregator, } from "../observability/index.js"; import { getActiveTraceContext } from "../telemetry/traceContext.js"; /** Default compaction threshold (80% of available input) */ const DEFAULT_COMPACTION_THRESHOLD = 0.8; /** Estimated tokens per tool definition */ const TOKENS_PER_TOOL_DEFINITION = 200; /** * Check whether a request fits within the model's context budget. * * Estimates total input tokens from: system prompt + tool definitions + * conversation history + current prompt + file attachments, and compares * against available input space. */ export function checkContextBudget(params) { const { traceId, parentSpanId } = getActiveTraceContext(); const span = SpanSerializer.createSpan(SpanType.CONTEXT_COMPACTION, "context.budgetCheck", { "context.operation": "budgetCheck", }, parentSpanId, traceId); const startTime = Date.now(); try { const { provider, model, maxTokens, systemPrompt, conversationMessages, currentPrompt, toolDefinitions, fileAttachments, compactionThreshold = DEFAULT_COMPACTION_THRESHOLD, } = params; const availableInputTokens = getAvailableInputTokens(provider, model, maxTokens); // Estimate each category const systemPromptTokens = systemPrompt ? estimateTokens(systemPrompt, provider) + TOKENS_PER_MESSAGE : 0; const conversationHistoryTokens = conversationMessages?.length ? estimateMessagesTokens(conversationMessages, provider) : 0; const currentPromptTokens = currentPrompt ? estimateTokens(currentPrompt, provider) + TOKENS_PER_MESSAGE : 0; const toolDefinitionTokens = toolDefinitions?.length ? toolDefinitions.reduce((sum, tool) => { try { const serialized = JSON.stringify(tool); return sum + estimateTokens(serialized, provider); } catch { return sum + TOKENS_PER_TOOL_DEFINITION; } }, 0) : 0; const fileAttachmentTokens = fileAttachments?.length ? fileAttachments.reduce((sum, file) => sum + estimateTokens(file.content, provider), 0) : 0; const estimatedInputTokens = systemPromptTokens + conversationHistoryTokens + currentPromptTokens + toolDefinitionTokens + fileAttachmentTokens; const usageRatio = availableInputTokens > 0 ? estimatedInputTokens / availableInputTokens : 1; const withinBudget = estimatedInputTokens <= availableInputTokens; const shouldCompact = usageRatio >= compactionThreshold; const result = { withinBudget, estimatedInputTokens, availableInputTokens, usageRatio, shouldCompact, breakdown: { systemPrompt: systemPromptTokens, conversationHistory: conversationHistoryTokens, currentPrompt: currentPromptTokens, toolDefinitions: toolDefinitionTokens, fileAttachments: fileAttachmentTokens, }, }; span.durationMs = Date.now() - startTime; const endedSpan = SpanSerializer.endSpan(SpanSerializer.updateAttributes(span, { "context.budgetUsage": usageRatio, "context.triggered": shouldCompact, "context.estimatedTokens": estimatedInputTokens, "context.availableTokens": availableInputTokens, }), SpanStatus.OK); getMetricsAggregator().recordSpan(endedSpan); return result; } catch (error) { span.durationMs = Date.now() - startTime; const endedSpan = SpanSerializer.endSpan(span, SpanStatus.ERROR); endedSpan.statusMessage = error instanceof Error ? error.message : String(error); getMetricsAggregator().recordSpan(endedSpan); throw error; } } //# sourceMappingURL=budgetChecker.js.map