UNPKG

@juspay/neurolink

Version:

Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio

89 lines (88 loc) 3.61 kB
/** * Emergency Content Truncation * * When message-level removal (sliding window) can't fit context into budget, * this truncates the CONTENT of the longest messages as a last resort. */ import { estimateTokens, estimateMessagesTokens, truncateToTokenBudget, } from "../utils/tokenEstimation.js"; import { logger } from "../utils/logger.js"; /** * Emergency content truncation: truncate the content of the longest messages * to fit within the available token budget. * * Strategy: Sort messages by content length (descending), truncate each * to a proportional share of the available budget until total fits. */ export function emergencyContentTruncation(messages, availableTokensForHistory, breakdown, provider) { // Budget available for conversation history specifically const historyBudget = availableTokensForHistory - breakdown.systemPrompt - breakdown.currentPrompt - breakdown.toolDefinitions - breakdown.fileAttachments; if (historyBudget <= 0) { // No room for history: return empty to guarantee budget safety return []; } const currentHistoryTokens = estimateMessagesTokens(messages, provider); if (currentHistoryTokens <= historyBudget) { return messages; // Already fits } // Calculate per-message budgets proportional to original size, // but cap large messages to free space for others const result = [...messages]; const reductionNeeded = currentHistoryTokens - historyBudget; const reductionRatio = reductionNeeded / currentHistoryTokens; // Sort indices by content length descending (truncate biggest first) const sortedIndices = result .map((msg, idx) => ({ idx, len: msg.content.length })) .sort((a, b) => b.len - a.len); let tokensSaved = 0; for (const { idx } of sortedIndices) { if (tokensSaved >= reductionNeeded) { break; } const msg = result[idx]; // Don't truncate system messages or very short messages if (msg.role === "system" || msg.content.length < 200) { continue; } const msgTokens = estimateTokens(msg.content, provider); const targetTokens = Math.floor(msgTokens * (1 - reductionRatio - 0.05)); if (targetTokens < msgTokens && targetTokens > 50) { const truncated = truncateToTokenBudget(msg.content, targetTokens, provider); if (truncated.truncated) { const savedThisMsg = msgTokens - estimateTokens(truncated.text, provider); tokensSaved += savedThisMsg; result[idx] = { ...msg, content: truncated.text, metadata: { ...msg.metadata, truncated: true }, }; } } } logger.info("[EmergencyTruncation] Content truncation complete", { tokensSaved, reductionNeeded, messagesModified: result.filter((m, i) => m !== messages[i]).length, }); // Final safety check: guarantee returned history fits budget if (estimateMessagesTokens(result, provider) <= historyBudget) { return result; } // Hard fallback: keep newest non-system messages that fit const fallback = []; for (let i = result.length - 1; i >= 0; i--) { const msg = result[i]; if (msg.role === "system") { continue; } fallback.unshift(msg); if (estimateMessagesTokens(fallback, provider) > historyBudget) { fallback.shift(); break; } } return fallback; }