UNPKG

@tehreet/conduit

Version:

LLM API gateway with intelligent routing, robust process management, and health monitoring

389 lines 12.8 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.tokenCounter = exports.TokenCounter = void 0; const tiktoken_1 = require("tiktoken"); const log_1 = require("./log"); class TokenCounter { constructor() { this.encoders = new Map(); this.cache = new Map(); this.batchQueue = []; this.batchTimer = null; this.batchResolvers = new Map(); this.options = { useCache: true, cacheMaxAge: 5 * 60 * 1000, // 5 minutes enableBatching: true, batchSize: 10, batchDelay: 100 // 100ms }; } /** * Count tokens for a given text and model */ async countTokens(text, model = 'claude-3-5-sonnet-20241022', options = {}) { const opts = { ...this.options, ...options }; // Check cache first if (opts.useCache) { const cached = this.getCachedResult(text, model); if (cached) { return { ...cached, method: 'cached' }; } } try { // Map Claude models to tiktoken models for approximation const tiktokenModel = this.mapToTiktokenModel(model); if (!this.encoders.has(tiktokenModel)) { const encoder = (0, tiktoken_1.encoding_for_model)(tiktokenModel); this.encoders.set(tiktokenModel, encoder); } const encoder = this.encoders.get(tiktokenModel); const tokens = encoder.encode(text); const result = { count: tokens.length, method: 'tiktoken', }; // Cache the result if (opts.useCache) { this.setCachedResult(text, model, result); } return result; } catch (error) { (0, log_1.log)('Token counting error, falling back to estimation:', error); // Fallback to simple estimation const result = { count: this.estimateTokens(text), method: 'estimate', }; // Cache the estimation result too if (opts.useCache) { this.setCachedResult(text, model, result); } return result; } } /** * Count tokens for a message array */ async countMessagesTokens(messages, model = 'claude-3-5-sonnet-20241022') { // Combine all message content const fullText = messages .map(msg => `${msg.role}: ${msg.content}`) .join('\n\n'); // Add some overhead for message structure const result = await this.countTokens(fullText, model); result.count = Math.ceil(result.count * 1.1); // 10% overhead for structure return result; } /** * Simple token estimation (roughly 4 chars per token) */ estimateTokens(text) { // More sophisticated estimation based on Claude's patterns const words = text.split(/\s+/).length; const chars = text.length; // Average between word count * 1.3 and char count / 4 const wordEstimate = words * 1.3; const charEstimate = chars / 4; return Math.ceil((wordEstimate + charEstimate) / 2); } /** * Map Claude model names to tiktoken models for approximation */ mapToTiktokenModel(claudeModel) { // Claude uses a similar tokenizer to GPT-4 if (claudeModel.includes('claude-3')) { return 'gpt-4'; } // Default to GPT-4 for best approximation return 'gpt-4'; } /** * Count tokens from streaming content */ createStreamCounter(model = 'claude-3-5-sonnet-20241022') { let totalTokens = 0; let buffer = ''; const self = this; return { addChunk(chunk) { buffer += chunk; // Count complete sentences/paragraphs to avoid recounting const completeBlocks = buffer.split(/\n\n/); if (completeBlocks.length > 1) { // Keep the last incomplete block in buffer buffer = completeBlocks.pop() || ''; // Count tokens in complete blocks for (const block of completeBlocks) { const tokens = self.estimateTokens(block); totalTokens += tokens; } } }, getTotal() { // Add remaining buffer if (buffer) { totalTokens += self.estimateTokens(buffer); buffer = ''; } return totalTokens; }, }; } /** * Batch count tokens for multiple texts */ async batchCountTokens(requests, options = {}) { const opts = { ...this.options, ...options }; if (!opts.enableBatching) { // Process individually if batching is disabled const results = []; for (const request of requests) { const result = await this.countTokens(request.text, request.model, opts); results.push({ id: request.id, count: result.count, method: result.method }); } return results; } // Process in batches const results = []; const batchSize = opts.batchSize || 10; for (let i = 0; i < requests.length; i += batchSize) { const batch = requests.slice(i, i + batchSize); const batchResults = await Promise.all(batch.map(async (request) => { const result = await this.countTokens(request.text, request.model, opts); return { id: request.id, count: result.count, method: result.method }; })); results.push(...batchResults); } return results; } /** * Count tokens with batching support (queued) */ async countTokensBatched(text, model = 'claude-3-5-sonnet-20241022', options = {}) { const opts = { ...this.options, ...options }; if (!opts.enableBatching) { return this.countTokens(text, model, opts); } return new Promise((resolve) => { const id = `${Date.now()}-${Math.random().toString(36).substr(2, 9)}`; this.batchResolvers.set(id, (result) => { resolve({ count: result.count, method: result.method }); }); this.batchQueue.push({ id, text, model }); this.scheduleBatchProcessing(); }); } /** * Get model-specific token counting configuration */ getModelConfig(model) { // Different models have different token characteristics if (model.includes('claude-3-5-sonnet')) { return { multiplier: 1.0, overhead: 0 }; } if (model.includes('claude-3-5-haiku')) { return { multiplier: 1.0, overhead: 0 }; } if (model.includes('claude-3-opus')) { return { multiplier: 1.0, overhead: 0 }; } if (model.includes('gpt-4')) { return { multiplier: 1.0, overhead: 0 }; } if (model.includes('gpt-3.5')) { return { multiplier: 0.9, overhead: 10 }; } // Default configuration return { multiplier: 1.0, overhead: 0 }; } /** * Update token counter options */ updateOptions(options) { this.options = { ...this.options, ...options }; } /** * Get current options */ getOptions() { return { ...this.options }; } /** * Clear token count cache */ clearCache() { this.cache.clear(); } /** * Get cache statistics */ getCacheStats() { if (this.cache.size === 0) { return { size: 0, hitRate: 0, oldestEntry: null, newestEntry: null }; } const entries = Array.from(this.cache.values()); const timestamps = entries.map(e => e.timestamp); return { size: this.cache.size, hitRate: 0, // Would need to track hits/misses for this oldestEntry: new Date(Math.min(...timestamps)), newestEntry: new Date(Math.max(...timestamps)) }; } /** * Schedule batch processing */ scheduleBatchProcessing() { if (this.batchTimer) { return; } this.batchTimer = setTimeout(() => { this.processBatch(); }, this.options.batchDelay || 100); } /** * Process the current batch queue */ async processBatch() { if (this.batchQueue.length === 0) { this.batchTimer = null; return; } const batch = this.batchQueue.splice(0, this.options.batchSize || 10); this.batchTimer = null; try { const results = await this.batchCountTokens(batch); for (const result of results) { const resolver = this.batchResolvers.get(result.id); if (resolver) { resolver(result); this.batchResolvers.delete(result.id); } } } catch (error) { (0, log_1.log)('Batch processing error:', error); // Resolve with error fallback for (const request of batch) { const resolver = this.batchResolvers.get(request.id); if (resolver) { resolver({ id: request.id, count: this.estimateTokens(request.text), method: 'estimate' }); this.batchResolvers.delete(request.id); } } } // Schedule next batch if queue is not empty if (this.batchQueue.length > 0) { this.scheduleBatchProcessing(); } } /** * Get cached result */ getCachedResult(text, model) { const cacheKey = this.getCacheKey(text, model); const cached = this.cache.get(cacheKey); if (!cached) { return null; } // Check if cache entry is still valid const now = Date.now(); const age = now - cached.timestamp; if (age > (this.options.cacheMaxAge || 5 * 60 * 1000)) { this.cache.delete(cacheKey); return null; } return cached.result; } /** * Set cached result */ setCachedResult(text, model, result) { const cacheKey = this.getCacheKey(text, model); this.cache.set(cacheKey, { text, model, result, timestamp: Date.now() }); // Clean up old entries periodically if (this.cache.size > 1000) { this.cleanupCache(); } } /** * Generate cache key */ getCacheKey(text, model) { // Use a simple hash of text + model for cache key return `${model}:${text.length}:${this.simpleHash(text)}`; } /** * Simple hash function for cache keys */ simpleHash(str) { let hash = 0; for (let i = 0; i < str.length; i++) { const char = str.charCodeAt(i); hash = ((hash << 5) - hash) + char; hash = hash & hash; // Convert to 32bit integer } return hash.toString(36); } /** * Clean up old cache entries */ cleanupCache() { const now = Date.now(); const maxAge = this.options.cacheMaxAge || 5 * 60 * 1000; for (const [key, entry] of this.cache.entries()) { if (now - entry.timestamp > maxAge) { this.cache.delete(key); } } } /** * Clean up encoders to free memory */ cleanup() { for (const encoder of this.encoders.values()) { if (encoder.free) { encoder.free(); } } this.encoders.clear(); this.cache.clear(); if (this.batchTimer) { clearTimeout(this.batchTimer); this.batchTimer = null; } this.batchQueue.length = 0; this.batchResolvers.clear(); } } exports.TokenCounter = TokenCounter; // Singleton instance exports.tokenCounter = new TokenCounter(); //# sourceMappingURL=token-counter.js.map