erosolar-cli
Version:
Unified AI agent framework for the command line - Multi-provider support with schema-driven tools, code intelligence, and transparent reasoning
1,060 lines (1,057 loc) • 42.1 kB
JavaScript
/**
* ContextManager - Manages conversation context to prevent token limit leaks
*
* Responsibilities:
* - Truncate tool outputs intelligently
* - Prune old conversation history with LLM summarization
* - Track and estimate token usage
* - Keep conversation within budget based on model context windows
* - Proactively shrink context before hitting limits
*/
import { calculateContextThresholds } from './contextWindow.js';
/**
* Summarization prompt template
*/
export const SUMMARIZATION_PROMPT = `Create a compact but reliable summary of the earlier conversation.
Keep:
- Decisions, preferences, and open questions
- File paths, function/class names, APIs, and error messages with fixes
- What was completed vs. still pending (tests, TODOs)
Format:
## Key Context
- ...
## Work Completed
- ...
## Open Items
- ...
Conversation:
{conversation}`;
/**
* Pre-defined AI Flow Patterns for intelligent context management
*/
export const DEFAULT_AI_FLOW_PATTERNS = [
{
patternId: 'read_edit_workflow',
description: 'Standard file modification workflow',
toolSequence: ['read', 'edit'],
contextImpact: 1500,
compactionOpportunity: true,
preservationPriority: 8,
},
{
patternId: 'analysis_phase',
description: 'Code analysis and exploration phase',
toolSequence: ['read', 'grep', 'glob', 'search'],
contextImpact: 3000,
compactionOpportunity: true,
preservationPriority: 6,
},
{
patternId: 'implementation_phase',
description: 'Active code implementation phase',
toolSequence: ['edit', 'write'],
contextImpact: 2000,
compactionOpportunity: false, // Preserve implementation context
preservationPriority: 9,
},
{
patternId: 'validation_phase',
description: 'Code validation and testing phase',
toolSequence: ['run_tests', 'run_build', 'run_repo_checks'],
contextImpact: 1000,
compactionOpportunity: true,
preservationPriority: 5,
},
{
patternId: 'parallel_execution',
description: 'Efficient parallel tool usage',
toolSequence: ['read', 'read', 'read'], // Multiple parallel reads
contextImpact: 2500,
compactionOpportunity: true,
preservationPriority: 7,
},
{
patternId: 'git_workflow',
description: 'Git operations workflow',
toolSequence: ['git_smart_commit', 'git_sync', 'git_create_pr'],
contextImpact: 1200,
compactionOpportunity: true,
preservationPriority: 6,
},
];
export class ContextManager {
config;
constructor(config = {}) {
this.config = {
maxTokens: 130000, // Leave room below 131072 limit
targetTokens: 100000, // Target to trigger pruning
maxToolOutputLength: 10000, // 10k chars max per tool output
preserveRecentMessages: 10, // Keep last 10 user/assistant exchanges
estimatedCharsPerToken: 4,
...config,
};
}
/**
* Truncate tool output intelligently
*/
truncateToolOutput(output, toolName) {
const originalLength = output.length;
if (originalLength <= this.config.maxToolOutputLength) {
return {
content: output,
wasTruncated: false,
originalLength,
truncatedLength: originalLength,
};
}
// Intelligent truncation based on tool type
const truncated = this.intelligentTruncate(output, toolName);
const truncatedLength = truncated.length;
return {
content: truncated,
wasTruncated: true,
originalLength,
truncatedLength,
};
}
/**
* Intelligent truncation based on tool type
*/
intelligentTruncate(output, toolName) {
const maxLength = this.config.maxToolOutputLength;
// For file reads, show beginning and end
if (toolName === 'Read' || toolName === 'read_file') {
return this.truncateFileOutput(output, maxLength);
}
// For search results, keep first N results
if (toolName === 'Grep' || toolName === 'grep_search' || toolName === 'Glob') {
return this.truncateSearchOutput(output, maxLength);
}
// For bash/command output, keep end (usually most relevant)
if (toolName === 'Bash' || toolName === 'bash' || toolName === 'execute_bash') {
return this.truncateBashOutput(output, maxLength);
}
// Default: show beginning with truncation notice
return this.truncateDefault(output, maxLength);
}
truncateFileOutput(output, maxLength) {
const lines = output.split('\n');
if (lines.length <= 100) {
// For small files, just truncate text
return this.truncateDefault(output, maxLength);
}
// Show first 50 and last 50 lines
const keepLines = Math.floor(maxLength / 100); // Rough estimate
const headLines = lines.slice(0, keepLines);
const tailLines = lines.slice(-keepLines);
const truncatedCount = lines.length - (keepLines * 2);
return [
...headLines,
`\n... [${truncatedCount} lines truncated for context management] ...\n`,
...tailLines,
].join('\n');
}
truncateSearchOutput(output, maxLength) {
const lines = output.split('\n');
const keepLines = Math.floor(maxLength / 80); // Rough average line length
if (lines.length <= keepLines) {
return output;
}
const truncatedCount = lines.length - keepLines;
return [
...lines.slice(0, keepLines),
`\n... [${truncatedCount} more results truncated for context management] ...`,
].join('\n');
}
truncateBashOutput(output, maxLength) {
if (output.length <= maxLength) {
return output;
}
// For command output, the end is usually most important (errors, final status)
const keepChars = Math.floor(maxLength * 0.8); // 80% at end
const prefixChars = maxLength - keepChars - 100; // Small prefix
const prefix = output.slice(0, prefixChars);
const suffix = output.slice(-keepChars);
const truncatedChars = output.length - prefixChars - keepChars;
return `${prefix}\n\n... [${truncatedChars} characters truncated for context management] ...\n\n${suffix}`;
}
truncateDefault(output, maxLength) {
if (output.length <= maxLength) {
return output;
}
const truncatedChars = output.length - maxLength + 100; // Account for notice
return `${output.slice(0, maxLength - 100)}\n\n... [${truncatedChars} characters truncated for context management] ...`;
}
/**
* Estimate tokens in a message
*/
estimateTokens(message) {
let charCount = 0;
if (message.content) {
charCount += message.content.length;
}
if (message.role === 'assistant' && message.toolCalls) {
// Tool calls add overhead
for (const call of message.toolCalls) {
charCount += call.name.length;
charCount += JSON.stringify(call.arguments).length;
}
}
return Math.ceil(charCount / this.config.estimatedCharsPerToken);
}
/**
* Detect context overflow risk from recent tool usage patterns
*/
detectContextOverflowRisk(toolCalls) {
const recentTools = toolCalls.slice(-10); // Last 10 tools
// Check for broad search patterns without limits
const broadSearches = recentTools.filter(tool => tool.includes('Glob') && !tool.includes('head_limit'));
// Check for multiple large file reads
const fileReads = recentTools.filter(tool => tool.includes('Read') || tool.includes('read_file'));
// Check for redundant context_snapshot calls
const contextSnapshots = recentTools.filter(tool => tool.includes('context_snapshot'));
// Risk threshold: 2+ broad searches OR 5+ file reads OR 1+ context_snapshot
return broadSearches.length >= 2 ||
fileReads.length >= 5 ||
contextSnapshots.length >= 1;
}
/**
* Estimate total tokens in conversation
*/
estimateTotalTokens(messages) {
return messages.reduce((sum, msg) => sum + this.estimateTokens(msg), 0);
}
/**
* Prune old messages when approaching limit
*
* Synchronously removes old messages to stay within budget.
* If LLM summarization is available and enabled, this method will be async.
*/
pruneMessages(messages) {
const totalTokens = this.estimateTotalTokens(messages);
// Only prune if we're above target
if (totalTokens < this.config.targetTokens) {
return { pruned: messages, removed: 0 };
}
// Always keep system message (first)
const firstMessage = messages[0];
const systemMessage = firstMessage?.role === 'system' ? firstMessage : null;
const conversationMessages = systemMessage ? messages.slice(1) : messages;
// Group messages into "turns" to maintain tool call/result pairing
// A turn is: [user] or [assistant + all its tool results]
const turns = [];
let currentTurn = [];
for (const msg of conversationMessages) {
if (msg.role === 'user') {
if (currentTurn.length > 0) {
turns.push(currentTurn);
}
currentTurn = [msg];
}
else if (msg.role === 'assistant') {
if (currentTurn.length > 0) {
turns.push(currentTurn);
}
currentTurn = [msg];
}
else if (msg.role === 'tool') {
// Tool results belong with the current assistant turn
currentTurn.push(msg);
}
}
if (currentTurn.length > 0) {
turns.push(currentTurn);
}
// Keep recent turns based on preserveRecentMessages (count user turns)
const recentTurns = [];
let exchangeCount = 0;
for (let i = turns.length - 1; i >= 0; i--) {
const turn = turns[i];
if (!turn || turn.length === 0)
continue;
recentTurns.unshift(turn);
// Count user messages as exchanges
if (turn[0]?.role === 'user') {
exchangeCount++;
if (exchangeCount >= this.config.preserveRecentMessages) {
break;
}
}
}
// IMPORTANT: Ensure we don't start with orphaned tool messages
// The first kept turn must start with user or assistant (not tool)
let startIndex = 0;
while (startIndex < recentTurns.length) {
const firstTurn = recentTurns[startIndex];
if (firstTurn && firstTurn.length > 0 && firstTurn[0]?.role === 'tool') {
startIndex++;
continue;
}
// Also check for assistant turns with missing tool results
if (firstTurn && firstTurn[0]?.role === 'assistant') {
const assistantMsg = firstTurn[0];
if (assistantMsg.toolCalls && assistantMsg.toolCalls.length > 0) {
// PERF: Pre-compute tool call IDs once, use direct Set lookup
const toolCallIds = assistantMsg.toolCalls.map(tc => tc.id);
const presentToolResultIds = new Set(firstTurn.filter(m => m.role === 'tool').map(m => m.toolCallId));
// If NOT all tool calls have results, skip this turn
// PERF: Direct has() calls instead of spread + every()
let allPresent = true;
for (const id of toolCallIds) {
if (!presentToolResultIds.has(id)) {
allPresent = false;
break;
}
}
if (!allPresent) {
startIndex++;
continue;
}
}
}
break;
}
const validTurns = recentTurns.slice(startIndex);
// Flatten turns back to messages
const recentMessages = validTurns.flat();
// Build pruned message list
const pruned = [];
if (systemMessage) {
pruned.push(systemMessage);
}
// Add a context summary message if we removed messages
const removedCount = conversationMessages.length - recentMessages.length;
if (removedCount > 0) {
pruned.push({
role: 'system',
content: `[Context Manager: Removed ${removedCount} old messages to stay within token budget. Recent conversation history preserved.]`,
});
}
pruned.push(...recentMessages);
return {
pruned,
removed: removedCount,
};
}
/**
* Prune messages with LLM-based summarization
*
* This is an async version that uses the LLM to create intelligent summaries
* instead of just removing old messages. Should be called BEFORE generation.
*/
async pruneMessagesWithSummary(messages, options) {
const totalTokens = this.estimateTotalTokens(messages);
// Only prune if we're above target
if (!options?.force && totalTokens < this.config.targetTokens) {
return { pruned: messages, removed: 0, summarized: false };
}
// If no summarization callback or disabled, fall back to simple pruning
if (!this.config.summarizationCallback || !this.config.useLLMSummarization) {
const result = this.pruneMessages(messages);
return { ...result, summarized: false };
}
// Partition messages
const firstMessage = messages[0];
const systemMessage = firstMessage?.role === 'system' ? firstMessage : null;
const conversationMessages = systemMessage ? messages.slice(1) : messages;
// Group messages into "turns" to maintain tool call/result pairing
const turns = [];
let currentTurn = [];
for (const msg of conversationMessages) {
if (msg.role === 'user') {
if (currentTurn.length > 0) {
turns.push(currentTurn);
}
currentTurn = [msg];
}
else if (msg.role === 'assistant') {
if (currentTurn.length > 0) {
turns.push(currentTurn);
}
currentTurn = [msg];
}
else if (msg.role === 'tool') {
currentTurn.push(msg);
}
}
if (currentTurn.length > 0) {
turns.push(currentTurn);
}
// Keep recent turns based on preserveRecentMessages
const recentTurns = [];
let exchangeCount = 0;
for (let i = turns.length - 1; i >= 0; i--) {
const turn = turns[i];
if (!turn || turn.length === 0)
continue;
recentTurns.unshift(turn);
if (turn[0]?.role === 'user') {
exchangeCount++;
if (exchangeCount >= this.config.preserveRecentMessages) {
break;
}
}
}
// Ensure we don't start with orphaned tool messages
let startIndex = 0;
while (startIndex < recentTurns.length) {
const firstTurn = recentTurns[startIndex];
if (firstTurn && firstTurn.length > 0 && firstTurn[0]?.role === 'tool') {
startIndex++;
continue;
}
if (firstTurn && firstTurn[0]?.role === 'assistant') {
const assistantMsg = firstTurn[0];
if (assistantMsg.toolCalls && assistantMsg.toolCalls.length > 0) {
// PERF: Pre-compute tool call IDs once, use direct Set lookup
const toolCallIds = assistantMsg.toolCalls.map(tc => tc.id);
const presentToolResultIds = new Set(firstTurn.filter(m => m.role === 'tool').map(m => m.toolCallId));
// PERF: Direct has() calls instead of spread + every()
let allPresent = true;
for (const id of toolCallIds) {
if (!presentToolResultIds.has(id)) {
allPresent = false;
break;
}
}
if (!allPresent) {
startIndex++;
continue;
}
}
}
break;
}
const validTurns = recentTurns.slice(startIndex);
const recentMessages = validTurns.flat();
// Determine which turns to summarize
const keepTurnCount = validTurns.length;
const summarizeTurns = turns.slice(0, turns.length - keepTurnCount - startIndex);
const toSummarize = summarizeTurns.flat();
// If nothing to summarize, return as-is
if (toSummarize.length === 0) {
return { pruned: messages, removed: 0, summarized: false };
}
try {
// Call the LLM to summarize old messages
const summary = await this.config.summarizationCallback(toSummarize);
// Build pruned message list with summary
const pruned = [];
if (systemMessage) {
pruned.push(systemMessage);
}
// Add intelligent summary
pruned.push({
role: 'system',
content: [
'=== Context Summary (Auto-generated) ===',
summary.trim(),
'',
`[Summarized ${toSummarize.length} earlier messages. Recent ${recentMessages.length} messages preserved below.]`,
].join('\n'),
});
pruned.push(...recentMessages);
return {
pruned,
removed: toSummarize.length,
summarized: true,
};
}
catch (error) {
// If summarization fails, fall back to simple pruning
const result = this.pruneMessages(messages);
return { ...result, summarized: false };
}
}
/**
* Check if we're approaching the limit
*/
isApproachingLimit(messages) {
const totalTokens = this.estimateTotalTokens(messages);
return totalTokens >= this.config.targetTokens;
}
/**
* Get warning level for current context usage
* Returns: null (no warning), 'info' (<70%), 'warning' (70-90%), 'danger' (>90%)
*/
getWarningLevel(messages) {
const totalTokens = this.estimateTotalTokens(messages);
const percentage = (totalTokens / this.config.maxTokens) * 100;
if (percentage > 90) {
return 'danger';
}
else if (percentage > 70) {
return 'warning';
}
else if (percentage > 50) {
return 'info';
}
return null;
}
/**
* Get a human-readable warning message
*/
getWarningMessage(messages) {
const stats = this.getStats(messages);
const warningLevel = this.getWarningLevel(messages);
if (warningLevel === 'danger') {
return `⚠️ Context usage critical (${stats.percentage}%). Consider starting a new session or the next request may fail.`;
}
else if (warningLevel === 'warning') {
return `Context usage high (${stats.percentage}%). Automatic cleanup will occur soon.`;
}
return null;
}
/**
* Get context stats
*/
getStats(messages) {
const totalTokens = this.estimateTotalTokens(messages);
const percentage = Math.round((totalTokens / this.config.maxTokens) * 100);
return {
totalTokens,
percentage,
isOverLimit: totalTokens >= this.config.maxTokens,
isApproachingLimit: totalTokens >= this.config.targetTokens,
};
}
/**
* Update configuration
*/
updateConfig(config) {
this.config = { ...this.config, ...config };
}
// ============================================================================
// INTELLIGENT COMPACTION SYSTEM
// Automatically detects optimal points for conversation compaction
// ============================================================================
/**
* Default patterns that indicate task boundaries
*/
static DEFAULT_TASK_BOUNDARY_PATTERNS = [
// Completion indicators
/\b(done|completed|finished|fixed|resolved|implemented|added|created|updated)\b/i,
/\b(all\s+(?:tests?\s+)?pass(?:ing|ed)?)\b/i,
/\b(successfully|works?\s+(?:now|correctly))\b/i,
// Transition indicators
/\b(next|now\s+(?:let's|we\s+can)|moving\s+on)\b/i,
/\b(that's\s+(?:it|all|done))\b/i,
// Acknowledgment patterns
/^(?:great|perfect|thanks|thank\s+you|got\s+it|understood)\b/i,
];
/**
* Patterns indicating topic/task shifts
*/
static TOPIC_SHIFT_PATTERNS = [
/\b(different|another|new|separate|unrelated)\s+(?:task|thing|topic|issue|question)\b/i,
/\b(can\s+you|could\s+you|please|now|let's)\s+(?:also|help|do|make|create|fix|add)\b/i,
/\b(switching|changing|moving)\s+to\b/i,
/\b(forget|ignore|never\s*mind)\s+(?:that|the|about)\b/i,
/^(?:ok|okay|alright|anyway|so)\s*[,.]?\s*(?:can|could|now|let|please)/i,
];
/**
* Patterns indicating user pivots (abandoning current direction)
*/
static USER_PIVOT_PATTERNS = [
/\b(actually|wait|hold\s+on|stop|cancel|scratch\s+that)\b/i,
/\b(let's\s+(?:try|do)\s+(?:something|it)\s+(?:else|differently))\b/i,
/\b(go\s+back|revert|undo|start\s+over)\b/i,
/\b(wrong|not\s+(?:what|right)|that's\s+not)\b/i,
];
/**
* Analyze the conversation to detect intelligent compaction points
*/
analyzeCompactionPoints(messages) {
const signals = [];
const totalTokens = this.estimateTotalTokens(messages);
const tokenPercentage = totalTokens / this.config.maxTokens;
const compactionThreshold = this.config.compactionThreshold ?? 0.5;
const minConfidence = this.config.minSignalConfidence ?? 0.6;
// Don't analyze if below threshold
if (tokenPercentage < compactionThreshold) {
return {
shouldCompact: false,
signals: [],
recommendedCompactionPoint: null,
urgency: 'none',
preserveFromIndex: 0,
};
}
// Analyze each message for compaction signals
for (let i = 0; i < messages.length; i++) {
const msg = messages[i];
if (!msg)
continue;
// Detect task boundaries
const taskBoundary = this.detectTaskBoundary(msg, i, messages);
if (taskBoundary && taskBoundary.confidence >= minConfidence) {
signals.push(taskBoundary);
}
// Detect topic shifts
const topicShift = this.detectTopicShift(msg, i, messages);
if (topicShift && topicShift.confidence >= minConfidence) {
signals.push(topicShift);
}
// Detect user pivots
const userPivot = this.detectUserPivot(msg, i);
if (userPivot && userPivot.confidence >= minConfidence) {
signals.push(userPivot);
}
// Detect context saturation (tool output heavy regions)
const saturation = this.detectContextSaturation(msg, i, messages);
if (saturation && saturation.confidence >= minConfidence) {
signals.push(saturation);
}
// Detect milestones
const milestone = this.detectMilestone(msg, i, messages);
if (milestone && milestone.confidence >= minConfidence) {
signals.push(milestone);
}
}
// Determine urgency based on token percentage
const urgency = this.calculateUrgency(tokenPercentage);
// Find the best compaction point
const recommendedPoint = this.findBestCompactionPoint(signals, messages, urgency);
// Calculate preserve index (everything after this should be kept)
const preserveFromIndex = recommendedPoint !== null
? this.findSafePreservePoint(recommendedPoint, messages)
: messages.length;
return {
shouldCompact: signals.length > 0 && urgency !== 'none',
signals,
recommendedCompactionPoint: recommendedPoint,
urgency,
preserveFromIndex,
};
}
/**
* Detect task boundary signals
*/
detectTaskBoundary(msg, index, messages) {
if (msg.role !== 'user' && msg.role !== 'assistant')
return null;
const content = msg.content || '';
const patterns = this.config.taskBoundaryPatterns
? this.config.taskBoundaryPatterns.map(p => new RegExp(p, 'i'))
: ContextManager.DEFAULT_TASK_BOUNDARY_PATTERNS;
let matchCount = 0;
const reasons = [];
for (const pattern of patterns) {
if (pattern.test(content)) {
matchCount++;
reasons.push(pattern.source.slice(0, 30));
}
}
if (matchCount === 0)
return null;
// Higher confidence if followed by a new user message with different intent
let confidence = Math.min(0.4 + matchCount * 0.2, 0.9);
// Boost confidence if this looks like a conclusion
if (msg.role === 'assistant' && this.looksLikeConclusion(content)) {
confidence = Math.min(confidence + 0.2, 0.95);
}
// Boost if next user message starts a new topic
const nextUserMsg = messages.slice(index + 1).find(m => m.role === 'user');
if (nextUserMsg && this.isNewTopic(content, nextUserMsg.content || '')) {
confidence = Math.min(confidence + 0.15, 0.95);
}
return {
type: 'task_boundary',
confidence,
messageIndex: index,
reason: `Task completion detected: ${reasons.slice(0, 2).join(', ')}`,
};
}
/**
* Detect topic shift signals
*/
detectTopicShift(msg, index, messages) {
if (msg.role !== 'user')
return null;
const content = msg.content || '';
const sensitivity = this.config.topicShiftSensitivity ?? 0.7;
// Check explicit shift patterns
for (const pattern of ContextManager.TOPIC_SHIFT_PATTERNS) {
if (pattern.test(content)) {
return {
type: 'topic_shift',
confidence: 0.7 + sensitivity * 0.2,
messageIndex: index,
reason: 'Explicit topic shift language detected',
};
}
}
// Check semantic shift from previous context
const prevMessages = messages.slice(Math.max(0, index - 5), index);
const prevContent = prevMessages
.filter(m => m.role === 'user' || m.role === 'assistant')
.map(m => m.content || '')
.join(' ');
if (prevContent && this.isNewTopic(prevContent, content)) {
return {
type: 'topic_shift',
confidence: 0.6 + sensitivity * 0.2,
messageIndex: index,
reason: 'Semantic topic shift detected',
};
}
return null;
}
/**
* Detect user pivot signals (abandoning current direction)
*/
detectUserPivot(msg, index) {
if (msg.role !== 'user')
return null;
const content = msg.content || '';
for (const pattern of ContextManager.USER_PIVOT_PATTERNS) {
if (pattern.test(content)) {
return {
type: 'user_pivot',
confidence: 0.85,
messageIndex: index,
reason: 'User pivot/direction change detected',
};
}
}
return null;
}
/**
* Detect context saturation (heavy tool output regions)
*/
detectContextSaturation(msg, index, messages) {
if (msg.role !== 'tool')
return null;
// Look at the surrounding region
const windowStart = Math.max(0, index - 10);
const windowEnd = Math.min(messages.length, index + 5);
const window = messages.slice(windowStart, windowEnd);
// Count tool messages and their sizes
let toolCount = 0;
let totalToolSize = 0;
for (const m of window) {
if (m.role === 'tool') {
toolCount++;
totalToolSize += (m.content || '').length;
}
}
// High saturation if many tool outputs with large content
if (toolCount >= 5 && totalToolSize > 20000) {
// Find the last tool message in this cluster as compaction point
let lastToolIndex = index;
for (let i = index + 1; i < windowEnd; i++) {
if (messages[i]?.role === 'tool') {
lastToolIndex = i;
}
else if (messages[i]?.role === 'user') {
break; // Stop at next user message
}
}
return {
type: 'context_saturation',
confidence: Math.min(0.5 + toolCount * 0.05, 0.85),
messageIndex: lastToolIndex,
reason: `Heavy tool output region (${toolCount} tools, ${Math.round(totalToolSize / 1000)}k chars)`,
};
}
return null;
}
/**
* Detect milestone signals (significant accomplishments)
*/
detectMilestone(msg, index, _messages) {
if (msg.role !== 'assistant')
return null;
const content = msg.content || '';
// Look for milestone indicators
const milestonePatterns = [
/\b(commit(?:ted)?|pushed|deployed|merged|released)\b/i,
/\b(all\s+tests?\s+pass(?:ing|ed)?)\b/i,
/\b(build\s+(?:succeed|success|pass))\b/i,
/\b(feature\s+(?:complete|done|ready))\b/i,
/\b(pr\s+(?:created|opened|merged))\b/i,
];
for (const pattern of milestonePatterns) {
if (pattern.test(content)) {
return {
type: 'milestone',
confidence: 0.9,
messageIndex: index,
reason: 'Significant milestone achieved',
};
}
}
return null;
}
/**
* Check if content looks like a task conclusion
*/
looksLikeConclusion(content) {
const conclusionPatterns = [
/\b(let\s+me\s+know|feel\s+free|if\s+you\s+(?:need|have|want))\b/i,
/\b(anything\s+else|other\s+questions?)\b/i,
/\b(should\s+be\s+(?:good|working|ready|done))\b/i,
/\b(that\s+should|this\s+(?:should|will))\s+(?:fix|solve|work)/i,
];
return conclusionPatterns.some(p => p.test(content));
}
/**
* Check if two contents represent different topics (simple heuristic)
*/
isNewTopic(prevContent, newContent) {
// Extract key terms (simple tokenization)
const extractTerms = (text) => {
const words = text.toLowerCase()
.replace(/[^a-z0-9\s]/g, ' ')
.split(/\s+/)
.filter(w => w.length > 3);
return new Set(words);
};
const prevTerms = extractTerms(prevContent);
const newTerms = extractTerms(newContent);
if (prevTerms.size === 0 || newTerms.size === 0)
return false;
// Calculate overlap
let overlap = 0;
for (const term of newTerms) {
if (prevTerms.has(term))
overlap++;
}
const overlapRatio = overlap / Math.min(prevTerms.size, newTerms.size);
// Low overlap suggests new topic
return overlapRatio < 0.2;
}
/**
* Calculate urgency level based on token percentage
*/
calculateUrgency(tokenPercentage) {
if (tokenPercentage >= 0.9)
return 'critical';
if (tokenPercentage >= 0.75)
return 'high';
if (tokenPercentage >= 0.6)
return 'medium';
if (tokenPercentage >= 0.5)
return 'low';
return 'none';
}
/**
* Find the best compaction point from signals
*/
findBestCompactionPoint(signals, messages, urgency) {
if (signals.length === 0)
return null;
// Score each signal based on type priority and confidence
const typePriority = {
milestone: 1.0,
task_boundary: 0.9,
user_pivot: 0.85,
ai_flow_pattern: 0.82, // AI flow patterns like thinking/tool use cycles
topic_shift: 0.8,
context_saturation: 0.7,
};
// Urgency affects how far back we're willing to compact
const urgencyDepth = {
none: 0,
low: 0.3, // Compact only recent 30%
medium: 0.5,
high: 0.7,
critical: 0.9,
};
const maxDepth = urgencyDepth[urgency] ?? 0.5;
const minIndex = Math.floor(messages.length * (1 - maxDepth));
// Find highest scoring signal within allowed depth
let bestSignal = null;
let bestScore = 0;
for (const signal of signals) {
if (signal.messageIndex < minIndex)
continue;
const score = signal.confidence * typePriority[signal.type];
if (score > bestScore) {
bestScore = score;
bestSignal = signal;
}
}
return bestSignal?.messageIndex ?? null;
}
/**
* Find a safe preservation point that doesn't break tool call chains
*/
findSafePreservePoint(compactionPoint, messages) {
// Start from compaction point and move forward to find a safe break
for (let i = compactionPoint + 1; i < messages.length; i++) {
const msg = messages[i];
if (!msg)
continue;
// Safe if it's a user message
if (msg.role === 'user') {
return i;
}
// Safe if it's an assistant without pending tool calls
if (msg.role === 'assistant' && !msg.toolCalls?.length) {
return i;
}
}
// If no safe point found, keep more messages
return Math.min(compactionPoint + 1, messages.length);
}
/**
* Perform intelligent compaction based on analysis
* This method analyzes the conversation and compacts at the optimal point
*/
async intelligentCompact(messages) {
// Analyze for compaction points
const analysis = this.analyzeCompactionPoints(messages);
// If no compaction needed or no good point found
if (!analysis.shouldCompact || analysis.recommendedCompactionPoint === null) {
return {
compacted: messages,
analysis,
summarized: false,
};
}
// Separate messages to summarize and preserve
const firstMessage = messages[0];
const systemMessage = firstMessage?.role === 'system' ? firstMessage : null;
const startIndex = systemMessage ? 1 : 0;
const toSummarize = messages.slice(startIndex, analysis.preserveFromIndex);
const toPreserve = messages.slice(analysis.preserveFromIndex);
// If nothing to summarize, return as-is
if (toSummarize.length === 0) {
return {
compacted: messages,
analysis,
summarized: false,
};
}
// Build result
const compacted = [];
if (systemMessage) {
compacted.push(systemMessage);
}
// Try LLM summarization if available
if (this.config.summarizationCallback && this.config.useLLMSummarization !== false) {
try {
const summary = await this.config.summarizationCallback(toSummarize);
compacted.push({
role: 'system',
content: [
'=== Intelligent Context Summary ===',
`Compaction triggered: ${analysis.signals[0]?.reason || 'Context optimization'}`,
'',
summary.trim(),
'',
`[Summarized ${toSummarize.length} messages. ${toPreserve.length} recent messages preserved.]`,
].join('\n'),
});
compacted.push(...toPreserve);
return {
compacted,
analysis,
summarized: true,
};
}
catch {
// Fall through to simple compaction
}
}
// Simple compaction without LLM
compacted.push({
role: 'system',
content: `[Context Manager: Intelligently compacted ${toSummarize.length} messages at "${analysis.signals[0]?.reason || 'optimal point'}". ${toPreserve.length} recent messages preserved.]`,
});
compacted.push(...toPreserve);
return {
compacted,
analysis,
summarized: false,
};
}
/**
* Check if intelligent compaction should be triggered
* Call this before generation to proactively manage context
*/
shouldTriggerCompaction(messages) {
if (this.config.enableIntelligentCompaction === false) {
return { shouldCompact: false, urgency: 'none', reason: null };
}
const analysis = this.analyzeCompactionPoints(messages);
if (!analysis.shouldCompact) {
return { shouldCompact: false, urgency: analysis.urgency, reason: null };
}
const topSignal = analysis.signals
.sort((a, b) => b.confidence - a.confidence)[0];
return {
shouldCompact: true,
urgency: analysis.urgency,
reason: topSignal?.reason || 'Context optimization recommended',
};
}
}
/**
* Create a default context manager instance with model-aware limits
*/
export function createDefaultContextManager(overrides, model) {
// Get model-specific thresholds
const thresholds = calculateContextThresholds(model);
return new ContextManager({
maxTokens: thresholds.maxTokens,
targetTokens: thresholds.targetTokens, // Start pruning at 60%
warningTokens: thresholds.warningTokens, // Warn at 50%
criticalTokens: thresholds.criticalTokens, // Critical at 75%
maxToolOutputLength: 5000, // 5k chars max per tool (reduced for safety)
preserveRecentMessages: 5, // Keep last 5 exchanges
estimatedCharsPerToken: 3.5, // More aggressive estimate (accounts for special tokens, JSON overhead)
useLLMSummarization: true, // Enable LLM summarization by default
// Intelligent compaction defaults
enableIntelligentCompaction: true,
compactionThreshold: 0.5, // Start analyzing at 50% context usage
minSignalConfidence: 0.6, // Require 60% confidence for compaction signals
topicShiftSensitivity: 0.7, // Moderately sensitive to topic changes
model,
...overrides,
});
}
/**
* Format conversation messages into readable text for summarization
*/
export function formatMessagesForSummary(messages) {
const lines = [];
for (const msg of messages) {
if (msg.role === 'user') {
lines.push(`USER: ${msg.content}`);
}
else if (msg.role === 'assistant') {
let content = msg.content || '';
if (msg.toolCalls && msg.toolCalls.length > 0) {
const toolNames = msg.toolCalls.map(tc => tc.name);
content += ` [Called tools: ${toolNames.join(', ')}]`;
}
lines.push(`ASSISTANT: ${content}`);
}
else if (msg.role === 'tool') {
// Truncate long tool outputs for summarization
const output = msg.content.length > 500
? `${msg.content.slice(0, 500)}...`
: msg.content;
lines.push(`TOOL (${msg.name}): ${output}`);
}
// Skip system messages in summary input
}
return lines.join('\n\n');
}
/**
* Create a summarization callback using the given provider
*/
export function createSummarizationCallback(provider) {
return async (messages) => {
// Format messages into readable conversation
const conversationText = formatMessagesForSummary(messages);
// Create summarization prompt
const prompt = SUMMARIZATION_PROMPT.replace('{conversation}', conversationText);
// Call provider to generate summary (no tools needed)
const response = await provider.generate([{ role: 'user', content: prompt }], []);
return response.content || '';
};
}
//# sourceMappingURL=contextManager.js.map