@ooples/token-optimizer-mcp
Version:
Intelligent context window optimization for Claude Code - store content externally via caching and compression, freeing up your context window for what matters
179 lines • 6.32 kB
JavaScript
import { encoding_for_model } from 'tiktoken';
export class TokenCounter {
encoder;
model;
constructor(model) {
// Auto-detect model from environment or use provided model
// Claude Code sets CLAUDE_MODEL env var with the active model
// Falls back to GPT-4 as universal approximation
this.model =
model ||
process.env.CLAUDE_MODEL ||
process.env.ANTHROPIC_MODEL ||
'gpt-4';
// Map Claude models to closest tiktoken equivalent
// Claude uses similar tokenization to GPT-4, so it's a good approximation
const tokenModel = this.mapToTiktokenModel(this.model);
// Initialize tiktoken encoder
this.encoder = encoding_for_model(tokenModel);
}
/**
* Map Claude/Anthropic models to tiktoken model names
*/
mapToTiktokenModel(model) {
const lowerModel = model.toLowerCase();
// Claude models use GPT-4 tokenizer as closest approximation
if (lowerModel.includes('claude') ||
lowerModel.includes('sonnet') ||
lowerModel.includes('opus') ||
lowerModel.includes('haiku')) {
return 'gpt-4';
}
// GPT-4 variants
if (lowerModel.includes('gpt-4')) {
return 'gpt-4';
}
// GPT-3.5 variants
if (lowerModel.includes('gpt-3.5') || lowerModel.includes('gpt3.5')) {
return 'gpt-3.5-turbo';
}
// Default to GPT-4 for unknown models
return 'gpt-4';
}
/**
* Count tokens in text
*/
count(text) {
const tokens = this.encoder.encode(text);
return {
tokens: tokens.length,
characters: text.length,
};
}
/**
* Count tokens in multiple texts
*/
countBatch(texts) {
let totalTokens = 0;
let totalCharacters = 0;
for (const text of texts) {
const result = this.count(text);
totalTokens += result.tokens;
totalCharacters += result.characters;
}
return {
tokens: totalTokens,
characters: totalCharacters,
};
}
/**
* Estimate token count without encoding (faster, less accurate)
*/
estimate(text) {
// Rough estimate: ~4 characters per token on average
return Math.ceil(text.length / 4);
}
/**
* Calculate token savings based on context window management
*
* @param originalText - The original text content
* @param contextTokens - Number of tokens remaining in LLM context (default: 0 for full caching)
* @returns Token savings calculation
*
* @remarks
* This method measures context window optimization, NOT compression ratio.
* When content is cached externally (SQLite, Redis, etc.), it's completely
* removed from the LLM's context window, resulting in 100% token savings.
*
* Use cases:
* - External caching: contextTokens = 0 (100% savings)
* - Metadata-only: contextTokens = tokens in metadata (e.g., 8)
* - Summarization: contextTokens = tokens in summary (e.g., 50)
*/
calculateSavings(originalText, contextTokens = 0) {
const original = this.count(originalText);
const saved = original.tokens - contextTokens;
const percentSaved = original.tokens > 0 ? (saved / original.tokens) * 100 : 0;
return {
originalTokens: original.tokens,
contextTokens,
tokensSaved: saved,
percentSaved,
};
}
/**
* Calculate context window savings for externally cached content
*
* @param originalText - The original text content being cached
* @returns Token savings calculation with 100% savings
*
* @remarks
* When content is compressed and stored in an external cache (SQLite, Redis, etc.),
* it's completely removed from the LLM's context window. The compressed/encoded
* data is NEVER sent to the LLM, so we measure 100% token savings.
*
* Key insight: We're measuring CONTEXT WINDOW CLEARANCE, not compression ratio.
* - ✅ Content removed from LLM context (saves tokens)
* - ✅ Storage compressed (saves disk space)
* - ❌ Don't count tokens in compressed data (it's not sent to LLM!)
*
* @example
* ```typescript
* const tokenCounter = new TokenCounter();
* const content = "Large file content...";
* const compressed = compress(content);
*
* // Store in external cache
* await cache.set(key, compressed);
*
* // Calculate context window savings
* const savings = tokenCounter.calculateCacheSavings(content);
* // Returns: { originalTokens: 250, contextTokens: 0, tokensSaved: 250, percentSaved: 100 }
* ```
*/
calculateCacheSavings(originalText) {
const original = this.count(originalText);
return {
originalTokens: original.tokens,
contextTokens: 0, // External cache - nothing in context
tokensSaved: original.tokens, // 100% of original tokens saved
percentSaved: 100, // Always 100% for external caching
};
}
/**
* Check if text exceeds token limit
*/
exceedsLimit(text, limit) {
const result = this.count(text);
return result.tokens > limit;
}
/**
* Truncate text to fit within token limit
*/
truncate(text, maxTokens) {
const tokens = this.encoder.encode(text);
if (tokens.length <= maxTokens) {
return text;
}
const truncatedTokens = tokens.slice(0, maxTokens);
const decoded = this.encoder.decode(truncatedTokens);
// Handle potential type issues with decode return value
return typeof decoded === 'string'
? decoded
: new TextDecoder().decode(decoded);
}
/**
* Get token-to-character ratio for text
*/
getTokenCharRatio(text) {
const result = this.count(text);
return result.tokens > 0 ? result.characters / result.tokens : 0;
}
/**
* Free the encoder resources
*/
free() {
this.encoder.free();
}
}
//# sourceMappingURL=token-counter.js.map