shipdeck
Version:
Ship MVPs in 48 hours. Fix bugs in 30 seconds. The command deck for developers who ship.
422 lines (355 loc) • 13.3 kB
JavaScript
/**
* Token Manager for Anthropic API
* Handles token counting, context window management, and optimization
*/
// Token estimation based on Claude's tokenizer patterns
const TOKEN_ESTIMATION = {
// Average characters per token for different content types
code: 3.5,
prose: 4.0,
structured: 3.0,
// Special tokens
systemTokens: 10, // Overhead for system messages
messageOverhead: 5, // Overhead per message
// Safety margins
contextSafetyMargin: 0.1, // Reserve 10% of context
responseSafetyMargin: 0.2 // Reserve 20% for response
};
// Model configurations
const MODEL_LIMITS = {
'claude-3-5-sonnet-20241022': { contextWindow: 200000, maxOutput: 8192 },
'claude-3-5-haiku-20241022': { contextWindow: 200000, maxOutput: 4096 },
'claude-3-opus-20240229': { contextWindow: 200000, maxOutput: 4096 },
'claude-opus-4-1-20250805': { contextWindow: 200000, maxOutput: 8192 } // Claude 4.1 Opus
};
class TokenManager {
constructor(model = 'claude-opus-4-1-20250805') {
this.model = model;
this.modelLimits = MODEL_LIMITS[model];
if (!this.modelLimits) {
throw new Error(`Unsupported model: ${model}. Supported models: ${Object.keys(MODEL_LIMITS).join(', ')}`);
}
// Cache for token estimates
this.tokenCache = new Map();
}
/**
* Estimate tokens for text content
*/
estimateTokens(text, contentType = 'prose') {
if (typeof text !== 'string') {
text = JSON.stringify(text);
}
// Check cache
const cacheKey = `${contentType}:${text.length}:${text.substring(0, 100)}`;
if (this.tokenCache.has(cacheKey)) {
return this.tokenCache.get(cacheKey);
}
let estimate = 0;
const charsPerToken = TOKEN_ESTIMATION[contentType] || TOKEN_ESTIMATION.prose;
// Basic estimation
estimate = Math.ceil(text.length / charsPerToken);
// Adjustments for different content patterns
if (contentType === 'code') {
// Code has more special characters and keywords
const specialChars = (text.match(/[{}();,.\[\]]/g) || []).length;
const keywords = (text.match(/\b(function|class|interface|import|export|const|let|var|if|else|for|while|return)\b/g) || []).length;
estimate += specialChars * 0.1 + keywords * 0.2;
} else if (contentType === 'structured') {
// JSON/YAML structures are more compact
const brackets = (text.match(/[{}[\]]/g) || []).length;
estimate += brackets * 0.1;
}
// Round up and cache
estimate = Math.ceil(estimate);
this.tokenCache.set(cacheKey, estimate);
// Limit cache size
if (this.tokenCache.size > 1000) {
const firstKey = this.tokenCache.keys().next().value;
this.tokenCache.delete(firstKey);
}
return estimate;
}
/**
* Estimate tokens for a message
*/
estimateMessageTokens(message) {
let tokens = TOKEN_ESTIMATION.messageOverhead;
if (message.role === 'system') {
tokens += TOKEN_ESTIMATION.systemTokens;
}
if (typeof message.content === 'string') {
tokens += this.estimateTokens(message.content);
} else if (Array.isArray(message.content)) {
// Multi-modal content
for (const content of message.content) {
if (content.type === 'text') {
tokens += this.estimateTokens(content.text);
} else if (content.type === 'image') {
// Image tokens - rough estimate
tokens += 1000; // Base cost for image processing
}
}
} else {
tokens += this.estimateTokens(JSON.stringify(message.content), 'structured');
}
return tokens;
}
/**
* Estimate tokens for a conversation
*/
estimateConversationTokens(messages) {
return messages.reduce((total, message) => {
return total + this.estimateMessageTokens(message);
}, 0);
}
/**
* Check if conversation fits within context window
*/
checkContextWindow(messages, maxOutputTokens = 4096) {
const inputTokens = this.estimateConversationTokens(messages);
const safeContextLimit = Math.floor(this.modelLimits.contextWindow * (1 - TOKEN_ESTIMATION.contextSafetyMargin));
const availableForOutput = safeContextLimit - inputTokens;
return {
inputTokens,
maxOutputTokens: Math.min(maxOutputTokens, this.modelLimits.maxOutput),
availableForOutput,
fitsInContext: inputTokens + maxOutputTokens <= safeContextLimit,
utilizationPercent: (inputTokens / this.modelLimits.contextWindow) * 100,
recommendedMaxOutput: Math.min(
Math.floor(availableForOutput * (1 - TOKEN_ESTIMATION.responseSafetyMargin)),
this.modelLimits.maxOutput
)
};
}
/**
* Optimize conversation for context window
*/
optimizeConversation(messages, maxOutputTokens = 4096, preserveSystemMessages = true) {
const analysis = this.checkContextWindow(messages, maxOutputTokens);
if (analysis.fitsInContext) {
return {
optimized: false,
messages,
analysis
};
}
let optimizedMessages = [...messages];
// Step 1: Preserve system and recent messages
const systemMessages = optimizedMessages.filter(msg => msg.role === 'system');
const recentMessages = optimizedMessages.slice(-3); // Keep last 3 messages
const middleMessages = optimizedMessages.slice(
preserveSystemMessages ? systemMessages.length : 0,
optimizedMessages.length - 3
);
// Step 2: Calculate target tokens
const safeContextLimit = Math.floor(this.modelLimits.contextWindow * 0.8);
const targetInputTokens = safeContextLimit - maxOutputTokens;
const currentTokens = this.estimateConversationTokens(optimizedMessages);
const reductionNeeded = currentTokens - targetInputTokens;
// Step 3: Compress or summarize middle messages
let finalMiddleMessages;
if (middleMessages.length > 10) {
// For many messages, create a summary
const summary = this._createMessageSummary(middleMessages);
finalMiddleMessages = [{
role: 'assistant',
content: `[Previous conversation summary: ${summary}]`
}];
} else {
// For fewer messages, try compression
finalMiddleMessages = this._compressMessages(middleMessages, reductionNeeded);
}
// Step 4: Reconstruct conversation
const finalMessages = [
...(preserveSystemMessages ? systemMessages : []),
...finalMiddleMessages,
...recentMessages
];
return {
optimized: true,
messages: finalMessages,
analysis: this.checkContextWindow(finalMessages, maxOutputTokens),
originalTokens: analysis.inputTokens,
optimizedTokens: this.estimateConversationTokens(finalMessages),
compressionRatio: this.estimateConversationTokens(finalMessages) / analysis.inputTokens
};
}
/**
* Compress messages to fit within token budget
*/
_compressMessages(messages, targetTokenReduction) {
if (messages.length === 0 || targetTokenReduction <= 0) {
return messages;
}
const compressed = [];
let tokensSaved = 0;
for (const message of messages) {
const messageTokens = this.estimateMessageTokens(message);
if (tokensSaved >= targetTokenReduction) {
// We've saved enough tokens, keep remaining messages
compressed.push(message);
} else if (messageTokens > 1000) {
// Compress long messages
const compressionRatio = Math.max(0.3, 1 - (targetTokenReduction - tokensSaved) / messageTokens);
const compressedContent = this._compressContent(message.content, compressionRatio);
compressed.push({
...message,
content: compressedContent
});
tokensSaved += messageTokens - this.estimateTokens(compressedContent);
} else {
// Skip short messages to save tokens
tokensSaved += messageTokens;
}
}
// If we haven't saved enough, add a summary
if (tokensSaved < targetTokenReduction && messages.length > 0) {
const summary = this._createMessageSummary(messages);
return [{
role: 'assistant',
content: `[Conversation summary: ${summary}]`
}];
}
return compressed;
}
/**
* Compress content while preserving key information
*/
_compressContent(content, ratio) {
if (typeof content !== 'string') {
content = JSON.stringify(content);
}
const targetLength = Math.floor(content.length * ratio);
// Simple compression: keep beginning and end, summarize middle
if (content.length <= targetLength) {
return content;
}
const beginningLength = Math.floor(targetLength * 0.3);
const endLength = Math.floor(targetLength * 0.3);
const summaryLength = targetLength - beginningLength - endLength - 20; // Buffer for summary text
const beginning = content.substring(0, beginningLength);
const end = content.substring(content.length - endLength);
if (summaryLength > 20) {
const middleStart = beginningLength;
const middleEnd = content.length - endLength;
const middle = content.substring(middleStart, middleEnd);
const summary = middle.length > summaryLength ?
`...[${Math.floor(middle.length/1000)}k chars omitted]...` :
middle.substring(0, summaryLength) + '...';
return `${beginning}${summary}${end}`;
}
return `${beginning}...${end}`;
}
/**
* Create a summary of messages
*/
_createMessageSummary(messages) {
const keyPoints = [];
for (const message of messages) {
if (message.role === 'user') {
const content = typeof message.content === 'string' ? message.content : JSON.stringify(message.content);
if (content.length > 50) {
keyPoints.push(`User asked: ${content.substring(0, 100)}...`);
}
} else if (message.role === 'assistant') {
const content = typeof message.content === 'string' ? message.content : JSON.stringify(message.content);
if (content.includes('```')) {
keyPoints.push('Assistant provided code solution');
} else if (content.length > 50) {
keyPoints.push(`Assistant: ${content.substring(0, 100)}...`);
}
}
}
return keyPoints.slice(0, 5).join('; '); // Keep top 5 points
}
/**
* Get optimal chunk size for long content
*/
getOptimalChunkSize(totalTokens, overlapTokens = 200) {
const maxChunkTokens = Math.floor(this.modelLimits.contextWindow * 0.7); // 70% of context window
if (totalTokens <= maxChunkTokens) {
return {
chunks: 1,
chunkSize: totalTokens,
overlap: 0,
processingStrategy: 'single'
};
}
const effectiveChunkSize = maxChunkTokens - overlapTokens;
const numChunks = Math.ceil(totalTokens / effectiveChunkSize);
return {
chunks: numChunks,
chunkSize: effectiveChunkSize,
overlap: overlapTokens,
processingStrategy: 'chunked',
estimatedTime: numChunks * 30 // Rough estimate in seconds
};
}
/**
* Split content into optimal chunks
*/
chunkContent(content, chunkInfo) {
if (chunkInfo.chunks === 1) {
return [content];
}
const chunks = [];
const contentLength = content.length;
const charsPerToken = TOKEN_ESTIMATION.prose;
const chunkChars = chunkInfo.chunkSize * charsPerToken;
const overlapChars = chunkInfo.overlap * charsPerToken;
let start = 0;
let chunkIndex = 0;
while (start < contentLength) {
const end = Math.min(start + chunkChars, contentLength);
let chunkContent = content.substring(start, end);
// Try to break at natural boundaries (sentences, paragraphs, code blocks)
if (end < contentLength) {
const lastNewline = chunkContent.lastIndexOf('\n\n');
const lastSentence = chunkContent.lastIndexOf('. ');
const lastCodeBlock = chunkContent.lastIndexOf('```');
const breakPoint = Math.max(lastNewline, lastSentence);
if (breakPoint > chunkChars * 0.8) { // Don't break too early
chunkContent = content.substring(start, start + breakPoint + 1);
}
}
chunks.push({
index: chunkIndex,
content: chunkContent,
tokens: this.estimateTokens(chunkContent),
start: start,
end: start + chunkContent.length
});
// Move start position with overlap
start = start + chunkContent.length - overlapChars;
chunkIndex++;
if (start >= contentLength) break;
}
return chunks;
}
/**
* Get model information
*/
getModelInfo() {
return {
model: this.model,
limits: this.modelLimits,
estimation: TOKEN_ESTIMATION
};
}
/**
* Clear token cache
*/
clearCache() {
this.tokenCache.clear();
}
/**
* Get cache statistics
*/
getCacheStats() {
return {
size: this.tokenCache.size,
hitRate: this.cacheHits / (this.cacheHits + this.cacheMisses) || 0
};
}
}
module.exports = { TokenManager, MODEL_LIMITS, TOKEN_ESTIMATION };